1 /*- 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * Copyright (c) 2003 Peter Wemm 9 * All rights reserved. 10 * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> 11 * All rights reserved. 12 * Copyright (c) 2014 Andrew Turner 13 * All rights reserved. 14 * Copyright (c) 2014 The FreeBSD Foundation 15 * All rights reserved. 16 * 17 * This code is derived from software contributed to Berkeley by 18 * the Systems Programming Group of the University of Utah Computer 19 * Science Department and William Jolitz of UUNET Technologies Inc. 20 * 21 * This software was developed by Andrew Turner under sponsorship from 22 * the FreeBSD Foundation. 23 * 24 * Redistribution and use in source and binary forms, with or without 25 * modification, are permitted provided that the following conditions 26 * are met: 27 * 1. Redistributions of source code must retain the above copyright 28 * notice, this list of conditions and the following disclaimer. 29 * 2. Redistributions in binary form must reproduce the above copyright 30 * notice, this list of conditions and the following disclaimer in the 31 * documentation and/or other materials provided with the distribution. 32 * 3. All advertising materials mentioning features or use of this software 33 * must display the following acknowledgement: 34 * This product includes software developed by the University of 35 * California, Berkeley and its contributors. 36 * 4. Neither the name of the University nor the names of its contributors 37 * may be used to endorse or promote products derived from this software 38 * without specific prior written permission. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 53 */ 54 /*- 55 * Copyright (c) 2003 Networks Associates Technology, Inc. 56 * All rights reserved. 57 * 58 * This software was developed for the FreeBSD Project by Jake Burkholder, 59 * Safeport Network Services, and Network Associates Laboratories, the 60 * Security Research Division of Network Associates, Inc. under 61 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 62 * CHATS research program. 63 * 64 * Redistribution and use in source and binary forms, with or without 65 * modification, are permitted provided that the following conditions 66 * are met: 67 * 1. Redistributions of source code must retain the above copyright 68 * notice, this list of conditions and the following disclaimer. 69 * 2. Redistributions in binary form must reproduce the above copyright 70 * notice, this list of conditions and the following disclaimer in the 71 * documentation and/or other materials provided with the distribution. 72 * 73 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 76 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 83 * SUCH DAMAGE. 84 */ 85 86 #define AMD64_NPT_AWARE 87 88 #include <sys/cdefs.h> 89 __FBSDID("$FreeBSD$"); 90 91 /* 92 * Manages physical address maps. 93 * 94 * Since the information managed by this module is 95 * also stored by the logical address mapping module, 96 * this module may throw away valid virtual-to-physical 97 * mappings at almost any time. However, invalidations 98 * of virtual-to-physical mappings must be done as 99 * requested. 100 * 101 * In order to cope with hardware architectures which 102 * make virtual-to-physical map invalidates expensive, 103 * this module may delay invalidate or reduced protection 104 * operations until such time as they are actually 105 * necessary. This module is given full information as 106 * to which processors are currently using which maps, 107 * and to when physical maps must be made correct. 108 */ 109 110 #include <sys/param.h> 111 #include <sys/bus.h> 112 #include <sys/systm.h> 113 #include <sys/kernel.h> 114 #include <sys/ktr.h> 115 #include <sys/lock.h> 116 #include <sys/malloc.h> 117 #include <sys/mman.h> 118 #include <sys/msgbuf.h> 119 #include <sys/mutex.h> 120 #include <sys/proc.h> 121 #include <sys/rwlock.h> 122 #include <sys/sx.h> 123 #include <sys/vmem.h> 124 #include <sys/vmmeter.h> 125 #include <sys/sched.h> 126 #include <sys/sysctl.h> 127 #include <sys/_unrhdr.h> 128 #include <sys/smp.h> 129 130 #include <vm/vm.h> 131 #include <vm/vm_param.h> 132 #include <vm/vm_kern.h> 133 #include <vm/vm_page.h> 134 #include <vm/vm_map.h> 135 #include <vm/vm_object.h> 136 #include <vm/vm_extern.h> 137 #include <vm/vm_pageout.h> 138 #include <vm/vm_pager.h> 139 #include <vm/vm_radix.h> 140 #include <vm/vm_reserv.h> 141 #include <vm/uma.h> 142 143 #include <machine/machdep.h> 144 #include <machine/md_var.h> 145 #include <machine/pcb.h> 146 147 #define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) 148 #define NUPDE (NPDEPG * NPDEPG) 149 #define NUSERPGTBLS (NUPDE + NPDEPG) 150 151 #if !defined(DIAGNOSTIC) 152 #ifdef __GNUC_GNU_INLINE__ 153 #define PMAP_INLINE __attribute__((__gnu_inline__)) inline 154 #else 155 #define PMAP_INLINE extern inline 156 #endif 157 #else 158 #define PMAP_INLINE 159 #endif 160 161 /* 162 * These are configured by the mair_el1 register. This is set up in locore.S 163 */ 164 #define DEVICE_MEMORY 0 165 #define UNCACHED_MEMORY 1 166 #define CACHED_MEMORY 2 167 168 169 #ifdef PV_STATS 170 #define PV_STAT(x) do { x ; } while (0) 171 #else 172 #define PV_STAT(x) do { } while (0) 173 #endif 174 175 #define pmap_l2_pindex(v) ((v) >> L2_SHIFT) 176 177 #define NPV_LIST_LOCKS MAXCPU 178 179 #define PHYS_TO_PV_LIST_LOCK(pa) \ 180 (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) 181 182 #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ 183 struct rwlock **_lockp = (lockp); \ 184 struct rwlock *_new_lock; \ 185 \ 186 _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ 187 if (_new_lock != *_lockp) { \ 188 if (*_lockp != NULL) \ 189 rw_wunlock(*_lockp); \ 190 *_lockp = _new_lock; \ 191 rw_wlock(*_lockp); \ 192 } \ 193 } while (0) 194 195 #define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ 196 CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) 197 198 #define RELEASE_PV_LIST_LOCK(lockp) do { \ 199 struct rwlock **_lockp = (lockp); \ 200 \ 201 if (*_lockp != NULL) { \ 202 rw_wunlock(*_lockp); \ 203 *_lockp = NULL; \ 204 } \ 205 } while (0) 206 207 #define VM_PAGE_TO_PV_LIST_LOCK(m) \ 208 PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) 209 210 struct pmap kernel_pmap_store; 211 212 vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 213 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 214 vm_offset_t kernel_vm_end = 0; 215 216 struct msgbuf *msgbufp = NULL; 217 218 static struct rwlock_padalign pvh_global_lock; 219 220 /* 221 * Data for the pv entry allocation mechanism 222 */ 223 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 224 static struct mtx pv_chunks_mutex; 225 static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; 226 227 static void free_pv_chunk(struct pv_chunk *pc); 228 static void free_pv_entry(pmap_t pmap, pv_entry_t pv); 229 static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); 230 static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); 231 static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 232 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 233 vm_offset_t va); 234 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 235 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); 236 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, 237 pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); 238 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 239 vm_page_t m, struct rwlock **lockp); 240 241 static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, 242 struct rwlock **lockp); 243 244 static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, 245 struct spglist *free); 246 static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); 247 248 /********************/ 249 /* Inline functions */ 250 /********************/ 251 252 static __inline void 253 pagecopy(void *s, void *d) 254 { 255 256 memcpy(d, s, PAGE_SIZE); 257 } 258 259 static __inline void 260 pagezero(void *p) 261 { 262 263 bzero(p, PAGE_SIZE); 264 } 265 266 #define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) 267 #define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) 268 #define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) 269 270 static __inline pd_entry_t * 271 pmap_l1(pmap_t pmap, vm_offset_t va) 272 { 273 274 return (&pmap->pm_l1[pmap_l1_index(va)]); 275 } 276 277 static __inline pd_entry_t * 278 pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) 279 { 280 pd_entry_t *l2; 281 282 l2 = (pd_entry_t *)PHYS_TO_DMAP(*l1 & ~ATTR_MASK); 283 return (&l2[pmap_l2_index(va)]); 284 } 285 286 static __inline pd_entry_t * 287 pmap_l2(pmap_t pmap, vm_offset_t va) 288 { 289 pd_entry_t *l1; 290 291 l1 = pmap_l1(pmap, va); 292 if ((*l1 & ATTR_DESCR_MASK) != L1_TABLE) 293 return (NULL); 294 295 return (pmap_l1_to_l2(l1, va)); 296 } 297 298 static __inline pt_entry_t * 299 pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) 300 { 301 pt_entry_t *l3; 302 303 l3 = (pd_entry_t *)PHYS_TO_DMAP(*l2 & ~ATTR_MASK); 304 return (&l3[pmap_l3_index(va)]); 305 } 306 307 static __inline pt_entry_t * 308 pmap_l3(pmap_t pmap, vm_offset_t va) 309 { 310 pd_entry_t *l2; 311 312 l2 = pmap_l2(pmap, va); 313 if (l2 == NULL || (*l2 & ATTR_DESCR_MASK) != L2_TABLE) 314 return (NULL); 315 316 return (pmap_l2_to_l3(l2, va)); 317 } 318 319 /* 320 * These load the old table data and store the new value. 321 * They need to be atomic as the System MMU may write to the table at 322 * the same time as the CPU. 323 */ 324 #define pmap_load_store(table, entry) atomic_swap_64(table, entry) 325 #define pmap_set(table, mask) atomic_set_64(table, mask) 326 #define pmap_load_clear(table) atomic_swap_64(table, 0) 327 #define pmap_load(table) (*table) 328 329 static __inline int 330 pmap_is_current(pmap_t pmap) 331 { 332 333 return ((pmap == pmap_kernel()) || 334 (pmap == curthread->td_proc->p_vmspace->vm_map.pmap)); 335 } 336 337 static __inline int 338 pmap_l3_valid(pt_entry_t l3) 339 { 340 341 return ((l3 & ATTR_DESCR_MASK) == L3_PAGE); 342 } 343 344 static __inline int 345 pmap_l3_valid_cacheable(pt_entry_t l3) 346 { 347 348 return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) && 349 ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY))); 350 } 351 352 #define PTE_SYNC(pte) cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte)) 353 354 /* 355 * Checks if the page is dirty. We currently lack proper tracking of this on 356 * arm64 so for now assume is a page mapped as rw was accessed it is. 357 */ 358 static inline int 359 pmap_page_dirty(pt_entry_t pte) 360 { 361 362 return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) == 363 (ATTR_AF | ATTR_AP(ATTR_AP_RW))); 364 } 365 366 static __inline void 367 pmap_resident_count_inc(pmap_t pmap, int count) 368 { 369 370 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 371 pmap->pm_stats.resident_count += count; 372 } 373 374 static __inline void 375 pmap_resident_count_dec(pmap_t pmap, int count) 376 { 377 378 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 379 KASSERT(pmap->pm_stats.resident_count >= count, 380 ("pmap %p resident count underflow %ld %d", pmap, 381 pmap->pm_stats.resident_count, count)); 382 pmap->pm_stats.resident_count -= count; 383 } 384 385 static pt_entry_t * 386 pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, 387 u_int *l2_slot) 388 { 389 pt_entry_t *l2; 390 pd_entry_t *l1; 391 392 l1 = (pd_entry_t *)l1pt; 393 *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; 394 395 /* Check locore has used a table L1 map */ 396 KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE, 397 ("Invalid bootstrap L1 table")); 398 /* Find the address of the L2 table */ 399 l2 = (pt_entry_t *)init_pt_va; 400 *l2_slot = pmap_l2_index(va); 401 402 return (l2); 403 } 404 405 static vm_paddr_t 406 pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) 407 { 408 u_int l1_slot, l2_slot; 409 pt_entry_t *l2; 410 411 l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); 412 413 return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET)); 414 } 415 416 static void 417 pmap_bootstrap_dmap(vm_offset_t l1pt) 418 { 419 vm_offset_t va; 420 vm_paddr_t pa; 421 pd_entry_t *l1; 422 u_int l1_slot; 423 424 va = DMAP_MIN_ADDRESS; 425 l1 = (pd_entry_t *)l1pt; 426 l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS); 427 428 for (pa = 0; va < DMAP_MAX_ADDRESS; 429 pa += L1_SIZE, va += L1_SIZE, l1_slot++) { 430 KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); 431 432 /* 433 * TODO: Turn the cache on here when we have cache 434 * flushing code. 435 */ 436 pmap_load_store(&l1[l1_slot], 437 (pa & ~L1_OFFSET) | ATTR_AF | L1_BLOCK | 438 ATTR_IDX(CACHED_MEMORY)); 439 } 440 441 cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); 442 cpu_tlb_flushID(); 443 } 444 445 static vm_offset_t 446 pmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start) 447 { 448 vm_offset_t l2pt; 449 vm_paddr_t pa; 450 pd_entry_t *l1; 451 u_int l1_slot; 452 453 KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address")); 454 455 l1 = (pd_entry_t *)l1pt; 456 l1_slot = pmap_l1_index(va); 457 l2pt = l2_start; 458 459 for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) { 460 KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); 461 462 pa = pmap_early_vtophys(l1pt, l2pt); 463 pmap_load_store(&l1[l1_slot], 464 (pa & ~Ln_TABLE_MASK) | L1_TABLE); 465 l2pt += PAGE_SIZE; 466 } 467 468 /* Clean the L2 page table */ 469 memset((void *)l2_start, 0, l2pt - l2_start); 470 cpu_dcache_wb_range(l2_start, l2pt - l2_start); 471 472 /* Flush the l1 table to ram */ 473 cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); 474 475 return l2pt; 476 } 477 478 static vm_offset_t 479 pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) 480 { 481 vm_offset_t l2pt, l3pt; 482 vm_paddr_t pa; 483 pd_entry_t *l2; 484 u_int l2_slot; 485 486 KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); 487 488 l2 = pmap_l2(kernel_pmap, va); 489 l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1)); 490 l2pt = (vm_offset_t)l2; 491 l2_slot = pmap_l2_index(va); 492 l3pt = l3_start; 493 494 for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { 495 KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); 496 497 pa = pmap_early_vtophys(l1pt, l3pt); 498 pmap_load_store(&l2[l2_slot], 499 (pa & ~Ln_TABLE_MASK) | L2_TABLE); 500 l3pt += PAGE_SIZE; 501 } 502 503 /* Clean the L2 page table */ 504 memset((void *)l3_start, 0, l3pt - l3_start); 505 cpu_dcache_wb_range(l3_start, l3pt - l3_start); 506 507 cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); 508 509 return l3pt; 510 } 511 512 /* 513 * Bootstrap the system enough to run with virtual memory. 514 */ 515 void 516 pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) 517 { 518 u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot; 519 uint64_t kern_delta; 520 pt_entry_t *l2; 521 vm_offset_t va, freemempos; 522 vm_offset_t dpcpu, msgbufpv; 523 vm_paddr_t pa; 524 525 kern_delta = KERNBASE - kernstart; 526 physmem = 0; 527 528 printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); 529 printf("%lx\n", l1pt); 530 printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); 531 532 /* Set this early so we can use the pagetable walking functions */ 533 kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt; 534 PMAP_LOCK_INIT(kernel_pmap); 535 536 /* 537 * Initialize the global pv list lock. 538 */ 539 rw_init(&pvh_global_lock, "pmap pv global"); 540 541 /* Create a direct map region early so we can use it for pa -> va */ 542 pmap_bootstrap_dmap(l1pt); 543 544 va = KERNBASE; 545 pa = KERNBASE - kern_delta; 546 547 /* 548 * Start to initialise phys_avail by copying from physmap 549 * up to the physical address KERNBASE points at. 550 */ 551 map_slot = avail_slot = 0; 552 for (; map_slot < (physmap_idx * 2); map_slot += 2) { 553 if (physmap[map_slot] == physmap[map_slot + 1]) 554 continue; 555 556 if (physmap[map_slot] <= pa && 557 physmap[map_slot + 1] > pa) 558 break; 559 560 phys_avail[avail_slot] = physmap[map_slot]; 561 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 562 physmem += (phys_avail[avail_slot + 1] - 563 phys_avail[avail_slot]) >> PAGE_SHIFT; 564 avail_slot += 2; 565 } 566 567 /* Add the memory before the kernel */ 568 if (physmap[avail_slot] < pa) { 569 phys_avail[avail_slot] = physmap[map_slot]; 570 phys_avail[avail_slot + 1] = pa; 571 physmem += (phys_avail[avail_slot + 1] - 572 phys_avail[avail_slot]) >> PAGE_SHIFT; 573 avail_slot += 2; 574 } 575 used_map_slot = map_slot; 576 577 /* 578 * Read the page table to find out what is already mapped. 579 * This assumes we have mapped a block of memory from KERNBASE 580 * using a single L1 entry. 581 */ 582 l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); 583 584 /* Sanity check the index, KERNBASE should be the first VA */ 585 KASSERT(l2_slot == 0, ("The L2 index is non-zero")); 586 587 /* Find how many pages we have mapped */ 588 for (; l2_slot < Ln_ENTRIES; l2_slot++) { 589 if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0) 590 break; 591 592 /* Check locore used L2 blocks */ 593 KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK, 594 ("Invalid bootstrap L2 table")); 595 KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa, 596 ("Incorrect PA in L2 table")); 597 598 va += L2_SIZE; 599 pa += L2_SIZE; 600 } 601 602 va = roundup2(va, L1_SIZE); 603 604 freemempos = KERNBASE + kernlen; 605 freemempos = roundup2(freemempos, PAGE_SIZE); 606 /* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */ 607 freemempos = pmap_bootstrap_l2(l1pt, va, freemempos); 608 /* And the l3 tables for the early devmap */ 609 freemempos = pmap_bootstrap_l3(l1pt, 610 VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos); 611 612 cpu_tlb_flushID(); 613 614 #define alloc_pages(var, np) \ 615 (var) = freemempos; \ 616 freemempos += (np * PAGE_SIZE); \ 617 memset((char *)(var), 0, ((np) * PAGE_SIZE)); 618 619 /* Allocate dynamic per-cpu area. */ 620 alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); 621 dpcpu_init((void *)dpcpu, 0); 622 623 /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ 624 alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); 625 msgbufp = (void *)msgbufpv; 626 627 virtual_avail = roundup2(freemempos, L1_SIZE); 628 virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; 629 kernel_vm_end = virtual_avail; 630 631 pa = pmap_early_vtophys(l1pt, freemempos); 632 633 /* Finish initialising physmap */ 634 map_slot = used_map_slot; 635 for (; avail_slot < (PHYS_AVAIL_SIZE - 2) && 636 map_slot < (physmap_idx * 2); map_slot += 2) { 637 if (physmap[map_slot] == physmap[map_slot + 1]) 638 continue; 639 640 /* Have we used the current range? */ 641 if (physmap[map_slot + 1] <= pa) 642 continue; 643 644 /* Do we need to split the entry? */ 645 if (physmap[map_slot] < pa) { 646 phys_avail[avail_slot] = pa; 647 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 648 } else { 649 phys_avail[avail_slot] = physmap[map_slot]; 650 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 651 } 652 physmem += (phys_avail[avail_slot + 1] - 653 phys_avail[avail_slot]) >> PAGE_SHIFT; 654 655 avail_slot += 2; 656 } 657 phys_avail[avail_slot] = 0; 658 phys_avail[avail_slot + 1] = 0; 659 660 /* 661 * Maxmem isn't the "maximum memory", it's one larger than the 662 * highest page of the physical address space. It should be 663 * called something like "Maxphyspage". 664 */ 665 Maxmem = atop(phys_avail[avail_slot - 1]); 666 667 cpu_tlb_flushID(); 668 } 669 670 /* 671 * Initialize a vm_page's machine-dependent fields. 672 */ 673 void 674 pmap_page_init(vm_page_t m) 675 { 676 677 TAILQ_INIT(&m->md.pv_list); 678 m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; 679 } 680 681 /* 682 * Initialize the pmap module. 683 * Called by vm_init, to initialize any structures that the pmap 684 * system needs to map virtual memory. 685 */ 686 void 687 pmap_init(void) 688 { 689 int i; 690 691 /* 692 * Initialize the pv chunk list mutex. 693 */ 694 mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); 695 696 /* 697 * Initialize the pool of pv list locks. 698 */ 699 for (i = 0; i < NPV_LIST_LOCKS; i++) 700 rw_init(&pv_list_locks[i], "pmap pv list"); 701 } 702 703 /* 704 * Normal, non-SMP, invalidation functions. 705 * We inline these within pmap.c for speed. 706 */ 707 PMAP_INLINE void 708 pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 709 { 710 711 sched_pin(); 712 __asm __volatile( 713 "dsb sy \n" 714 "tlbi vaae1is, %0 \n" 715 "dsb sy \n" 716 "isb \n" 717 : : "r"(va >> PAGE_SHIFT)); 718 sched_unpin(); 719 } 720 721 PMAP_INLINE void 722 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 723 { 724 vm_offset_t addr; 725 726 sched_pin(); 727 sva >>= PAGE_SHIFT; 728 eva >>= PAGE_SHIFT; 729 __asm __volatile("dsb sy"); 730 for (addr = sva; addr < eva; addr++) { 731 __asm __volatile( 732 "tlbi vaae1is, %0" : : "r"(addr)); 733 } 734 __asm __volatile( 735 "dsb sy \n" 736 "isb \n"); 737 sched_unpin(); 738 } 739 740 PMAP_INLINE void 741 pmap_invalidate_all(pmap_t pmap) 742 { 743 744 sched_pin(); 745 __asm __volatile( 746 "dsb sy \n" 747 "tlbi vmalle1is \n" 748 "dsb sy \n" 749 "isb \n"); 750 sched_unpin(); 751 } 752 753 /* 754 * Routine: pmap_extract 755 * Function: 756 * Extract the physical page address associated 757 * with the given map/virtual_address pair. 758 */ 759 vm_paddr_t 760 pmap_extract(pmap_t pmap, vm_offset_t va) 761 { 762 pd_entry_t *l2p, l2; 763 pt_entry_t *l3p, l3; 764 vm_paddr_t pa; 765 766 pa = 0; 767 PMAP_LOCK(pmap); 768 /* 769 * Start with the l2 tabel. We are unable to allocate 770 * pages in the l1 table. 771 */ 772 l2p = pmap_l2(pmap, va); 773 if (l2p != NULL) { 774 l2 = *l2p; 775 if ((l2 & ATTR_DESCR_MASK) == L2_TABLE) { 776 l3p = pmap_l2_to_l3(l2p, va); 777 if (l3p != NULL) { 778 l3 = *l3p; 779 780 if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) 781 pa = (l3 & ~ATTR_MASK) | 782 (va & L3_OFFSET); 783 } 784 } else if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) 785 pa = (l2 & ~ATTR_MASK) | (va & L2_OFFSET); 786 } 787 PMAP_UNLOCK(pmap); 788 return (pa); 789 } 790 791 /* 792 * Routine: pmap_extract_and_hold 793 * Function: 794 * Atomically extract and hold the physical page 795 * with the given pmap and virtual address pair 796 * if that mapping permits the given protection. 797 */ 798 vm_page_t 799 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 800 { 801 pt_entry_t *l3p, l3; 802 vm_paddr_t pa; 803 vm_page_t m; 804 805 pa = 0; 806 m = NULL; 807 PMAP_LOCK(pmap); 808 retry: 809 l3p = pmap_l3(pmap, va); 810 if (l3p != NULL && (l3 = *l3p) != 0) { 811 if (((l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || 812 ((prot & VM_PROT_WRITE) == 0)) { 813 if (vm_page_pa_tryrelock(pmap, l3 & ~ATTR_MASK, &pa)) 814 goto retry; 815 m = PHYS_TO_VM_PAGE(l3 & ~ATTR_MASK); 816 vm_page_hold(m); 817 } 818 } 819 PA_UNLOCK_COND(pa); 820 PMAP_UNLOCK(pmap); 821 return (m); 822 } 823 824 vm_paddr_t 825 pmap_kextract(vm_offset_t va) 826 { 827 pd_entry_t *l2; 828 pt_entry_t *l3; 829 vm_paddr_t pa; 830 831 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 832 pa = DMAP_TO_PHYS(va); 833 } else { 834 l2 = pmap_l2(kernel_pmap, va); 835 if (l2 == NULL) 836 panic("pmap_kextract: No l2"); 837 if ((*l2 & ATTR_DESCR_MASK) == L2_BLOCK) 838 return ((*l2 & ~ATTR_MASK) | (va & L2_OFFSET)); 839 840 l3 = pmap_l2_to_l3(l2, va); 841 if (l3 == NULL) 842 panic("pmap_kextract: No l3..."); 843 pa = (*l3 & ~ATTR_MASK) | (va & PAGE_MASK); 844 } 845 return (pa); 846 } 847 848 /*************************************************** 849 * Low level mapping routines..... 850 ***************************************************/ 851 852 void 853 pmap_kenter_device(vm_offset_t va, vm_size_t size, vm_paddr_t pa) 854 { 855 pt_entry_t *l3; 856 857 KASSERT((pa & L3_OFFSET) == 0, 858 ("pmap_kenter_device: Invalid physical address")); 859 KASSERT((va & L3_OFFSET) == 0, 860 ("pmap_kenter_device: Invalid virtual address")); 861 KASSERT((size & PAGE_MASK) == 0, 862 ("pmap_kenter_device: Mapping is not page-sized")); 863 864 while (size != 0) { 865 l3 = pmap_l3(kernel_pmap, va); 866 KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); 867 pmap_load_store(l3, (pa & ~L3_OFFSET) | ATTR_AF | L3_PAGE | 868 ATTR_IDX(DEVICE_MEMORY)); 869 PTE_SYNC(l3); 870 871 va += PAGE_SIZE; 872 pa += PAGE_SIZE; 873 size -= PAGE_SIZE; 874 } 875 } 876 877 /* 878 * Remove a page from the kernel pagetables. 879 * Note: not SMP coherent. 880 */ 881 PMAP_INLINE void 882 pmap_kremove(vm_offset_t va) 883 { 884 pt_entry_t *l3; 885 886 l3 = pmap_l3(kernel_pmap, va); 887 KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); 888 889 if (pmap_l3_valid_cacheable(pmap_load(l3))) 890 cpu_dcache_wb_range(va, L3_SIZE); 891 pmap_load_clear(l3); 892 PTE_SYNC(l3); 893 } 894 895 void 896 pmap_kremove_device(vm_offset_t va, vm_size_t size) 897 { 898 pt_entry_t *l3; 899 900 KASSERT((va & L3_OFFSET) == 0, 901 ("pmap_kremove_device: Invalid virtual address")); 902 KASSERT((size & PAGE_MASK) == 0, 903 ("pmap_kremove_device: Mapping is not page-sized")); 904 905 while (size != 0) { 906 l3 = pmap_l3(kernel_pmap, va); 907 KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); 908 pmap_load_clear(l3); 909 PTE_SYNC(l3); 910 911 va += PAGE_SIZE; 912 size -= PAGE_SIZE; 913 } 914 } 915 916 /* 917 * Used to map a range of physical addresses into kernel 918 * virtual address space. 919 * 920 * The value passed in '*virt' is a suggested virtual address for 921 * the mapping. Architectures which can support a direct-mapped 922 * physical to virtual region can return the appropriate address 923 * within that region, leaving '*virt' unchanged. Other 924 * architectures should map the pages starting at '*virt' and 925 * update '*virt' with the first usable address after the mapped 926 * region. 927 */ 928 vm_offset_t 929 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 930 { 931 return PHYS_TO_DMAP(start); 932 } 933 934 935 /* 936 * Add a list of wired pages to the kva 937 * this routine is only used for temporary 938 * kernel mappings that do not need to have 939 * page modification or references recorded. 940 * Note that old mappings are simply written 941 * over. The page *must* be wired. 942 * Note: SMP coherent. Uses a ranged shootdown IPI. 943 */ 944 void 945 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 946 { 947 pt_entry_t *l3, pa; 948 vm_offset_t va; 949 vm_page_t m; 950 int i; 951 952 va = sva; 953 for (i = 0; i < count; i++) { 954 m = ma[i]; 955 pa = VM_PAGE_TO_PHYS(m) | ATTR_AF | 956 ATTR_IDX(m->md.pv_memattr) | ATTR_AP(ATTR_AP_RW) | L3_PAGE; 957 l3 = pmap_l3(kernel_pmap, va); 958 pmap_load_store(l3, pa); 959 PTE_SYNC(l3); 960 961 va += L3_SIZE; 962 } 963 } 964 965 /* 966 * This routine tears out page mappings from the 967 * kernel -- it is meant only for temporary mappings. 968 * Note: SMP coherent. Uses a ranged shootdown IPI. 969 */ 970 void 971 pmap_qremove(vm_offset_t sva, int count) 972 { 973 vm_offset_t va; 974 975 va = sva; 976 while (count-- > 0) { 977 KASSERT(va >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", va)); 978 pmap_kremove(va); 979 va += PAGE_SIZE; 980 } 981 pmap_invalidate_range(kernel_pmap, sva, va); 982 } 983 984 /*************************************************** 985 * Page table page management routines..... 986 ***************************************************/ 987 static __inline void 988 pmap_free_zero_pages(struct spglist *free) 989 { 990 vm_page_t m; 991 992 while ((m = SLIST_FIRST(free)) != NULL) { 993 SLIST_REMOVE_HEAD(free, plinks.s.ss); 994 /* Preserve the page's PG_ZERO setting. */ 995 vm_page_free_toq(m); 996 } 997 } 998 999 /* 1000 * Schedule the specified unused page table page to be freed. Specifically, 1001 * add the page to the specified list of pages that will be released to the 1002 * physical memory manager after the TLB has been updated. 1003 */ 1004 static __inline void 1005 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, 1006 boolean_t set_PG_ZERO) 1007 { 1008 1009 if (set_PG_ZERO) 1010 m->flags |= PG_ZERO; 1011 else 1012 m->flags &= ~PG_ZERO; 1013 SLIST_INSERT_HEAD(free, m, plinks.s.ss); 1014 } 1015 1016 /* 1017 * Decrements a page table page's wire count, which is used to record the 1018 * number of valid page table entries within the page. If the wire count 1019 * drops to zero, then the page table page is unmapped. Returns TRUE if the 1020 * page table page was unmapped and FALSE otherwise. 1021 */ 1022 static inline boolean_t 1023 pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1024 { 1025 1026 --m->wire_count; 1027 if (m->wire_count == 0) { 1028 _pmap_unwire_l3(pmap, va, m, free); 1029 return (TRUE); 1030 } else 1031 return (FALSE); 1032 } 1033 1034 static void 1035 _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1036 { 1037 1038 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1039 /* 1040 * unmap the page table page 1041 */ 1042 if (m->pindex >= NUPDE) { 1043 /* PD page */ 1044 pd_entry_t *l1; 1045 l1 = pmap_l1(pmap, va); 1046 pmap_load_clear(l1); 1047 PTE_SYNC(l1); 1048 } else { 1049 /* PTE page */ 1050 pd_entry_t *l2; 1051 l2 = pmap_l2(pmap, va); 1052 pmap_load_clear(l2); 1053 PTE_SYNC(l2); 1054 } 1055 pmap_resident_count_dec(pmap, 1); 1056 if (m->pindex < NUPDE) { 1057 /* We just released a PT, unhold the matching PD */ 1058 vm_page_t pdpg; 1059 1060 pdpg = PHYS_TO_VM_PAGE(*pmap_l1(pmap, va) & ~ATTR_MASK); 1061 pmap_unwire_l3(pmap, va, pdpg, free); 1062 } 1063 1064 /* 1065 * This is a release store so that the ordinary store unmapping 1066 * the page table page is globally performed before TLB shoot- 1067 * down is begun. 1068 */ 1069 atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); 1070 1071 /* 1072 * Put page on a list so that it is released after 1073 * *ALL* TLB shootdown is done 1074 */ 1075 pmap_add_delayed_free_list(m, free, TRUE); 1076 } 1077 1078 /* 1079 * After removing an l3 entry, this routine is used to 1080 * conditionally free the page, and manage the hold/wire counts. 1081 */ 1082 static int 1083 pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, 1084 struct spglist *free) 1085 { 1086 vm_page_t mpte; 1087 1088 if (va >= VM_MAXUSER_ADDRESS) 1089 return (0); 1090 KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); 1091 mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK); 1092 return (pmap_unwire_l3(pmap, va, mpte, free)); 1093 } 1094 1095 void 1096 pmap_pinit0(pmap_t pmap) 1097 { 1098 1099 PMAP_LOCK_INIT(pmap); 1100 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1101 pmap->pm_l1 = kernel_pmap->pm_l1; 1102 } 1103 1104 int 1105 pmap_pinit(pmap_t pmap) 1106 { 1107 vm_paddr_t l1phys; 1108 vm_page_t l1pt; 1109 1110 /* 1111 * allocate the l1 page 1112 */ 1113 while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL | 1114 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) 1115 VM_WAIT; 1116 1117 l1phys = VM_PAGE_TO_PHYS(l1pt); 1118 pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys); 1119 1120 if ((l1pt->flags & PG_ZERO) == 0) 1121 pagezero(pmap->pm_l1); 1122 1123 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1124 1125 return (1); 1126 } 1127 1128 /* 1129 * This routine is called if the desired page table page does not exist. 1130 * 1131 * If page table page allocation fails, this routine may sleep before 1132 * returning NULL. It sleeps only if a lock pointer was given. 1133 * 1134 * Note: If a page allocation fails at page table level two or three, 1135 * one or two pages may be held during the wait, only to be released 1136 * afterwards. This conservative approach is easily argued to avoid 1137 * race conditions. 1138 */ 1139 static vm_page_t 1140 _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) 1141 { 1142 vm_page_t m, /*pdppg, */pdpg; 1143 1144 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1145 1146 /* 1147 * Allocate a page table page. 1148 */ 1149 if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1150 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1151 if (lockp != NULL) { 1152 RELEASE_PV_LIST_LOCK(lockp); 1153 PMAP_UNLOCK(pmap); 1154 rw_runlock(&pvh_global_lock); 1155 VM_WAIT; 1156 rw_rlock(&pvh_global_lock); 1157 PMAP_LOCK(pmap); 1158 } 1159 1160 /* 1161 * Indicate the need to retry. While waiting, the page table 1162 * page may have been allocated. 1163 */ 1164 return (NULL); 1165 } 1166 if ((m->flags & PG_ZERO) == 0) 1167 pmap_zero_page(m); 1168 1169 /* 1170 * Map the pagetable page into the process address space, if 1171 * it isn't already there. 1172 */ 1173 1174 if (ptepindex >= NUPDE) { 1175 pd_entry_t *l1; 1176 vm_pindex_t l1index; 1177 1178 l1index = ptepindex - NUPDE; 1179 l1 = &pmap->pm_l1[l1index]; 1180 pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE); 1181 PTE_SYNC(l1); 1182 1183 } else { 1184 vm_pindex_t l1index; 1185 pd_entry_t *l1, *l2; 1186 1187 l1index = ptepindex >> (L1_SHIFT - L2_SHIFT); 1188 l1 = &pmap->pm_l1[l1index]; 1189 if (*l1 == 0) { 1190 /* recurse for allocating page dir */ 1191 if (_pmap_alloc_l3(pmap, NUPDE + l1index, 1192 lockp) == NULL) { 1193 --m->wire_count; 1194 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1195 vm_page_free_zero(m); 1196 return (NULL); 1197 } 1198 } else { 1199 pdpg = PHYS_TO_VM_PAGE(*l1 & ~ATTR_MASK); 1200 pdpg->wire_count++; 1201 } 1202 1203 l2 = (pd_entry_t *)PHYS_TO_DMAP(*l1 & ~ATTR_MASK); 1204 l2 = &l2[ptepindex & Ln_ADDR_MASK]; 1205 pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | ATTR_AF | 1206 ATTR_IDX(CACHED_MEMORY) | L2_TABLE); 1207 PTE_SYNC(l2); 1208 } 1209 1210 pmap_resident_count_inc(pmap, 1); 1211 1212 return (m); 1213 } 1214 1215 static vm_page_t 1216 pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) 1217 { 1218 vm_pindex_t ptepindex; 1219 pd_entry_t *l2; 1220 vm_page_t m; 1221 1222 /* 1223 * Calculate pagetable page index 1224 */ 1225 ptepindex = pmap_l2_pindex(va); 1226 retry: 1227 /* 1228 * Get the page directory entry 1229 */ 1230 l2 = pmap_l2(pmap, va); 1231 1232 /* 1233 * If the page table page is mapped, we just increment the 1234 * hold count, and activate it. 1235 */ 1236 if (l2 != NULL && *l2 != 0) { 1237 m = PHYS_TO_VM_PAGE(*l2 & ~ATTR_MASK); 1238 m->wire_count++; 1239 } else { 1240 /* 1241 * Here if the pte page isn't mapped, or if it has been 1242 * deallocated. 1243 */ 1244 m = _pmap_alloc_l3(pmap, ptepindex, lockp); 1245 if (m == NULL && lockp != NULL) 1246 goto retry; 1247 } 1248 /* 1249 * XXXARM64: I'm not sure why we need this but it fixes a crash 1250 * when running things from a shell script. 1251 */ 1252 pmap_invalidate_all(pmap); 1253 return (m); 1254 } 1255 1256 1257 /*************************************************** 1258 * Pmap allocation/deallocation routines. 1259 ***************************************************/ 1260 1261 /* 1262 * Release any resources held by the given physical map. 1263 * Called when a pmap initialized by pmap_pinit is being released. 1264 * Should only be called if the map contains no valid mappings. 1265 */ 1266 void 1267 pmap_release(pmap_t pmap) 1268 { 1269 vm_page_t m; 1270 1271 KASSERT(pmap->pm_stats.resident_count == 0, 1272 ("pmap_release: pmap resident count %ld != 0", 1273 pmap->pm_stats.resident_count)); 1274 1275 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1)); 1276 1277 m->wire_count--; 1278 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1279 vm_page_free_zero(m); 1280 } 1281 1282 #if 0 1283 static int 1284 kvm_size(SYSCTL_HANDLER_ARGS) 1285 { 1286 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; 1287 1288 return sysctl_handle_long(oidp, &ksize, 0, req); 1289 } 1290 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1291 0, 0, kvm_size, "LU", "Size of KVM"); 1292 1293 static int 1294 kvm_free(SYSCTL_HANDLER_ARGS) 1295 { 1296 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1297 1298 return sysctl_handle_long(oidp, &kfree, 0, req); 1299 } 1300 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1301 0, 0, kvm_free, "LU", "Amount of KVM free"); 1302 #endif /* 0 */ 1303 1304 /* 1305 * grow the number of kernel page table entries, if needed 1306 */ 1307 void 1308 pmap_growkernel(vm_offset_t addr) 1309 { 1310 vm_paddr_t paddr; 1311 vm_page_t nkpg; 1312 pd_entry_t *l1, *l2; 1313 1314 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1315 1316 addr = roundup2(addr, L2_SIZE); 1317 if (addr - 1 >= kernel_map->max_offset) 1318 addr = kernel_map->max_offset; 1319 while (kernel_vm_end < addr) { 1320 l1 = pmap_l1(kernel_pmap, kernel_vm_end); 1321 if (*l1 == 0) { 1322 /* We need a new PDP entry */ 1323 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, 1324 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | 1325 VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1326 if (nkpg == NULL) 1327 panic("pmap_growkernel: no memory to grow kernel"); 1328 if ((nkpg->flags & PG_ZERO) == 0) 1329 pmap_zero_page(nkpg); 1330 paddr = VM_PAGE_TO_PHYS(nkpg); 1331 pmap_load_store(l1, paddr | L1_TABLE); 1332 PTE_SYNC(l1); 1333 continue; /* try again */ 1334 } 1335 l2 = pmap_l1_to_l2(l1, kernel_vm_end); 1336 if ((*l2 & ATTR_AF) != 0) { 1337 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1338 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1339 kernel_vm_end = kernel_map->max_offset; 1340 break; 1341 } 1342 continue; 1343 } 1344 1345 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, 1346 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1347 VM_ALLOC_ZERO); 1348 if (nkpg == NULL) 1349 panic("pmap_growkernel: no memory to grow kernel"); 1350 if ((nkpg->flags & PG_ZERO) == 0) 1351 pmap_zero_page(nkpg); 1352 paddr = VM_PAGE_TO_PHYS(nkpg); 1353 pmap_load_store(l2, paddr | L2_TABLE); 1354 PTE_SYNC(l2); 1355 1356 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1357 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1358 kernel_vm_end = kernel_map->max_offset; 1359 break; 1360 } 1361 } 1362 } 1363 1364 1365 /*************************************************** 1366 * page management routines. 1367 ***************************************************/ 1368 1369 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1370 CTASSERT(_NPCM == 3); 1371 CTASSERT(_NPCPV == 168); 1372 1373 static __inline struct pv_chunk * 1374 pv_to_chunk(pv_entry_t pv) 1375 { 1376 1377 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 1378 } 1379 1380 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1381 1382 #define PC_FREE0 0xfffffffffffffffful 1383 #define PC_FREE1 0xfffffffffffffffful 1384 #define PC_FREE2 0x000000fffffffffful 1385 1386 static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; 1387 1388 #if 0 1389 #ifdef PV_STATS 1390 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1391 1392 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1393 "Current number of pv entry chunks"); 1394 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1395 "Current number of pv entry chunks allocated"); 1396 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1397 "Current number of pv entry chunks frees"); 1398 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1399 "Number of times tried to get a chunk page but failed."); 1400 1401 static long pv_entry_frees, pv_entry_allocs, pv_entry_count; 1402 static int pv_entry_spare; 1403 1404 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1405 "Current number of pv entry frees"); 1406 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1407 "Current number of pv entry allocs"); 1408 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1409 "Current number of pv entries"); 1410 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1411 "Current number of spare pv entries"); 1412 #endif 1413 #endif /* 0 */ 1414 1415 /* 1416 * We are in a serious low memory condition. Resort to 1417 * drastic measures to free some pages so we can allocate 1418 * another pv entry chunk. 1419 * 1420 * Returns NULL if PV entries were reclaimed from the specified pmap. 1421 * 1422 * We do not, however, unmap 2mpages because subsequent accesses will 1423 * allocate per-page pv entries until repromotion occurs, thereby 1424 * exacerbating the shortage of free pv entries. 1425 */ 1426 static vm_page_t 1427 reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) 1428 { 1429 1430 panic("reclaim_pv_chunk"); 1431 } 1432 1433 /* 1434 * free the pv_entry back to the free list 1435 */ 1436 static void 1437 free_pv_entry(pmap_t pmap, pv_entry_t pv) 1438 { 1439 struct pv_chunk *pc; 1440 int idx, field, bit; 1441 1442 rw_assert(&pvh_global_lock, RA_LOCKED); 1443 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1444 PV_STAT(atomic_add_long(&pv_entry_frees, 1)); 1445 PV_STAT(atomic_add_int(&pv_entry_spare, 1)); 1446 PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); 1447 pc = pv_to_chunk(pv); 1448 idx = pv - &pc->pc_pventry[0]; 1449 field = idx / 64; 1450 bit = idx % 64; 1451 pc->pc_map[field] |= 1ul << bit; 1452 if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || 1453 pc->pc_map[2] != PC_FREE2) { 1454 /* 98% of the time, pc is already at the head of the list. */ 1455 if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { 1456 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1457 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1458 } 1459 return; 1460 } 1461 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1462 free_pv_chunk(pc); 1463 } 1464 1465 static void 1466 free_pv_chunk(struct pv_chunk *pc) 1467 { 1468 vm_page_t m; 1469 1470 mtx_lock(&pv_chunks_mutex); 1471 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1472 mtx_unlock(&pv_chunks_mutex); 1473 PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); 1474 PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); 1475 PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); 1476 /* entire chunk is free, return it */ 1477 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); 1478 #if 0 /* TODO: For minidump */ 1479 dump_drop_page(m->phys_addr); 1480 #endif 1481 vm_page_unwire(m, PQ_INACTIVE); 1482 vm_page_free(m); 1483 } 1484 1485 /* 1486 * Returns a new PV entry, allocating a new PV chunk from the system when 1487 * needed. If this PV chunk allocation fails and a PV list lock pointer was 1488 * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is 1489 * returned. 1490 * 1491 * The given PV list lock may be released. 1492 */ 1493 static pv_entry_t 1494 get_pv_entry(pmap_t pmap, struct rwlock **lockp) 1495 { 1496 int bit, field; 1497 pv_entry_t pv; 1498 struct pv_chunk *pc; 1499 vm_page_t m; 1500 1501 rw_assert(&pvh_global_lock, RA_LOCKED); 1502 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1503 PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); 1504 retry: 1505 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 1506 if (pc != NULL) { 1507 for (field = 0; field < _NPCM; field++) { 1508 if (pc->pc_map[field]) { 1509 bit = ffsl(pc->pc_map[field]) - 1; 1510 break; 1511 } 1512 } 1513 if (field < _NPCM) { 1514 pv = &pc->pc_pventry[field * 64 + bit]; 1515 pc->pc_map[field] &= ~(1ul << bit); 1516 /* If this was the last item, move it to tail */ 1517 if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && 1518 pc->pc_map[2] == 0) { 1519 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1520 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, 1521 pc_list); 1522 } 1523 PV_STAT(atomic_add_long(&pv_entry_count, 1)); 1524 PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); 1525 return (pv); 1526 } 1527 } 1528 /* No free items, allocate another chunk */ 1529 m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 1530 VM_ALLOC_WIRED); 1531 if (m == NULL) { 1532 if (lockp == NULL) { 1533 PV_STAT(pc_chunk_tryfail++); 1534 return (NULL); 1535 } 1536 m = reclaim_pv_chunk(pmap, lockp); 1537 if (m == NULL) 1538 goto retry; 1539 } 1540 PV_STAT(atomic_add_int(&pc_chunk_count, 1)); 1541 PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); 1542 #if 0 /* TODO: This is for minidump */ 1543 dump_add_page(m->phys_addr); 1544 #endif 1545 pc = (void *)PHYS_TO_DMAP(m->phys_addr); 1546 pc->pc_pmap = pmap; 1547 pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ 1548 pc->pc_map[1] = PC_FREE1; 1549 pc->pc_map[2] = PC_FREE2; 1550 mtx_lock(&pv_chunks_mutex); 1551 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 1552 mtx_unlock(&pv_chunks_mutex); 1553 pv = &pc->pc_pventry[0]; 1554 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1555 PV_STAT(atomic_add_long(&pv_entry_count, 1)); 1556 PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); 1557 return (pv); 1558 } 1559 1560 /* 1561 * First find and then remove the pv entry for the specified pmap and virtual 1562 * address from the specified pv list. Returns the pv entry if found and NULL 1563 * otherwise. This operation can be performed on pv lists for either 4KB or 1564 * 2MB page mappings. 1565 */ 1566 static __inline pv_entry_t 1567 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1568 { 1569 pv_entry_t pv; 1570 1571 rw_assert(&pvh_global_lock, RA_LOCKED); 1572 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 1573 if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 1574 TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 1575 pvh->pv_gen++; 1576 break; 1577 } 1578 } 1579 return (pv); 1580 } 1581 1582 /* 1583 * First find and then destroy the pv entry for the specified pmap and virtual 1584 * address. This operation can be performed on pv lists for either 4KB or 2MB 1585 * page mappings. 1586 */ 1587 static void 1588 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1589 { 1590 pv_entry_t pv; 1591 1592 pv = pmap_pvh_remove(pvh, pmap, va); 1593 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 1594 free_pv_entry(pmap, pv); 1595 } 1596 1597 /* 1598 * Conditionally create the PV entry for a 4KB page mapping if the required 1599 * memory can be allocated without resorting to reclamation. 1600 */ 1601 static boolean_t 1602 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, 1603 struct rwlock **lockp) 1604 { 1605 pv_entry_t pv; 1606 1607 rw_assert(&pvh_global_lock, RA_LOCKED); 1608 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1609 /* Pass NULL instead of the lock pointer to disable reclamation. */ 1610 if ((pv = get_pv_entry(pmap, NULL)) != NULL) { 1611 pv->pv_va = va; 1612 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1613 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 1614 m->md.pv_gen++; 1615 return (TRUE); 1616 } else 1617 return (FALSE); 1618 } 1619 1620 /* 1621 * pmap_remove_l3: do the things to unmap a page in a process 1622 */ 1623 static int 1624 pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, 1625 pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) 1626 { 1627 pt_entry_t old_l3; 1628 vm_page_t m; 1629 1630 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1631 if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) 1632 cpu_dcache_wb_range(va, L3_SIZE); 1633 old_l3 = pmap_load_clear(l3); 1634 PTE_SYNC(l3); 1635 if (old_l3 & ATTR_SW_WIRED) 1636 pmap->pm_stats.wired_count -= 1; 1637 pmap_resident_count_dec(pmap, 1); 1638 if (old_l3 & ATTR_SW_MANAGED) { 1639 m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK); 1640 if (pmap_page_dirty(old_l3)) 1641 vm_page_dirty(m); 1642 if (old_l3 & ATTR_AF) 1643 vm_page_aflag_set(m, PGA_REFERENCED); 1644 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1645 pmap_pvh_free(&m->md, pmap, va); 1646 } 1647 return (pmap_unuse_l3(pmap, va, l2e, free)); 1648 } 1649 1650 /* 1651 * Remove the given range of addresses from the specified map. 1652 * 1653 * It is assumed that the start and end are properly 1654 * rounded to the page size. 1655 */ 1656 void 1657 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1658 { 1659 struct rwlock *lock; 1660 vm_offset_t va, va_next; 1661 pd_entry_t *l1, *l2; 1662 pt_entry_t l3_paddr, *l3; 1663 struct spglist free; 1664 int anyvalid; 1665 1666 /* 1667 * Perform an unsynchronized read. This is, however, safe. 1668 */ 1669 if (pmap->pm_stats.resident_count == 0) 1670 return; 1671 1672 anyvalid = 0; 1673 SLIST_INIT(&free); 1674 1675 rw_rlock(&pvh_global_lock); 1676 PMAP_LOCK(pmap); 1677 1678 lock = NULL; 1679 for (; sva < eva; sva = va_next) { 1680 1681 if (pmap->pm_stats.resident_count == 0) 1682 break; 1683 1684 l1 = pmap_l1(pmap, sva); 1685 if (*l1 == 0) { 1686 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 1687 if (va_next < sva) 1688 va_next = eva; 1689 continue; 1690 } 1691 1692 /* 1693 * Calculate index for next page table. 1694 */ 1695 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 1696 if (va_next < sva) 1697 va_next = eva; 1698 1699 l2 = pmap_l1_to_l2(l1, sva); 1700 if (l2 == NULL) 1701 continue; 1702 1703 l3_paddr = *l2; 1704 1705 /* 1706 * Weed out invalid mappings. 1707 */ 1708 if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE) 1709 continue; 1710 1711 /* 1712 * Limit our scan to either the end of the va represented 1713 * by the current page table page, or to the end of the 1714 * range being removed. 1715 */ 1716 if (va_next > eva) 1717 va_next = eva; 1718 1719 va = va_next; 1720 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 1721 sva += L3_SIZE) { 1722 if (l3 == NULL) 1723 panic("l3 == NULL"); 1724 if (*l3 == 0) { 1725 if (va != va_next) { 1726 pmap_invalidate_range(pmap, va, sva); 1727 va = va_next; 1728 } 1729 continue; 1730 } 1731 if (va == va_next) 1732 va = sva; 1733 if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free, 1734 &lock)) { 1735 sva += L3_SIZE; 1736 break; 1737 } 1738 } 1739 if (va != va_next) 1740 pmap_invalidate_range(pmap, va, sva); 1741 } 1742 if (lock != NULL) 1743 rw_wunlock(lock); 1744 if (anyvalid) 1745 pmap_invalidate_all(pmap); 1746 rw_runlock(&pvh_global_lock); 1747 PMAP_UNLOCK(pmap); 1748 pmap_free_zero_pages(&free); 1749 } 1750 1751 /* 1752 * Routine: pmap_remove_all 1753 * Function: 1754 * Removes this physical page from 1755 * all physical maps in which it resides. 1756 * Reflects back modify bits to the pager. 1757 * 1758 * Notes: 1759 * Original versions of this routine were very 1760 * inefficient because they iteratively called 1761 * pmap_remove (slow...) 1762 */ 1763 1764 void 1765 pmap_remove_all(vm_page_t m) 1766 { 1767 pv_entry_t pv; 1768 pmap_t pmap; 1769 pt_entry_t *l3, tl3; 1770 pd_entry_t *l2; 1771 struct spglist free; 1772 1773 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1774 ("pmap_remove_all: page %p is not managed", m)); 1775 SLIST_INIT(&free); 1776 rw_wlock(&pvh_global_lock); 1777 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1778 pmap = PV_PMAP(pv); 1779 PMAP_LOCK(pmap); 1780 pmap_resident_count_dec(pmap, 1); 1781 l2 = pmap_l2(pmap, pv->pv_va); 1782 KASSERT((*l2 & ATTR_DESCR_MASK) == L2_TABLE, 1783 ("pmap_remove_all: found a table when expecting " 1784 "a block in %p's pv list", m)); 1785 l3 = pmap_l2_to_l3(l2, pv->pv_va); 1786 if (pmap_is_current(pmap) && 1787 pmap_l3_valid_cacheable(pmap_load(l3))) 1788 cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 1789 tl3 = pmap_load_clear(l3); 1790 PTE_SYNC(l3); 1791 if (tl3 & ATTR_SW_WIRED) 1792 pmap->pm_stats.wired_count--; 1793 if ((tl3 & ATTR_AF) != 0) 1794 vm_page_aflag_set(m, PGA_REFERENCED); 1795 1796 /* 1797 * Update the vm_page_t clean and reference bits. 1798 */ 1799 if (pmap_page_dirty(tl3)) 1800 vm_page_dirty(m); 1801 pmap_unuse_l3(pmap, pv->pv_va, *l2, &free); 1802 pmap_invalidate_page(pmap, pv->pv_va); 1803 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 1804 m->md.pv_gen++; 1805 free_pv_entry(pmap, pv); 1806 PMAP_UNLOCK(pmap); 1807 } 1808 vm_page_aflag_clear(m, PGA_WRITEABLE); 1809 rw_wunlock(&pvh_global_lock); 1810 pmap_free_zero_pages(&free); 1811 } 1812 1813 /* 1814 * Set the physical protection on the 1815 * specified range of this map as requested. 1816 */ 1817 void 1818 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1819 { 1820 vm_offset_t va, va_next; 1821 pd_entry_t *l1, *l2; 1822 pt_entry_t *l3p, l3; 1823 1824 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1825 pmap_remove(pmap, sva, eva); 1826 return; 1827 } 1828 1829 if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE) 1830 return; 1831 1832 PMAP_LOCK(pmap); 1833 for (; sva < eva; sva = va_next) { 1834 1835 l1 = pmap_l1(pmap, sva); 1836 if (*l1 == 0) { 1837 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 1838 if (va_next < sva) 1839 va_next = eva; 1840 continue; 1841 } 1842 1843 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 1844 if (va_next < sva) 1845 va_next = eva; 1846 1847 l2 = pmap_l1_to_l2(l1, sva); 1848 if (l2 == NULL || (*l2 & ATTR_DESCR_MASK) != L2_TABLE) 1849 continue; 1850 1851 if (va_next > eva) 1852 va_next = eva; 1853 1854 va = va_next; 1855 for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, 1856 sva += L3_SIZE) { 1857 l3 = pmap_load(l3p); 1858 if (pmap_l3_valid(l3)) { 1859 pmap_set(l3p, ATTR_AP(ATTR_AP_RO)); 1860 PTE_SYNC(l3p); 1861 } 1862 } 1863 } 1864 PMAP_UNLOCK(pmap); 1865 1866 /* TODO: Only invalidate entries we are touching */ 1867 pmap_invalidate_all(pmap); 1868 } 1869 1870 /* 1871 * Insert the given physical page (p) at 1872 * the specified virtual address (v) in the 1873 * target physical map with the protection requested. 1874 * 1875 * If specified, the page will be wired down, meaning 1876 * that the related pte can not be reclaimed. 1877 * 1878 * NB: This is the only routine which MAY NOT lazy-evaluate 1879 * or lose information. That is, this routine must actually 1880 * insert this page into the given map NOW. 1881 */ 1882 int 1883 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 1884 u_int flags, int8_t psind __unused) 1885 { 1886 struct rwlock *lock; 1887 pd_entry_t *l1, *l2; 1888 pt_entry_t new_l3, orig_l3; 1889 pt_entry_t *l3; 1890 pv_entry_t pv; 1891 vm_paddr_t opa, pa, l2_pa, l3_pa; 1892 vm_page_t mpte, om, l2_m, l3_m; 1893 boolean_t nosleep; 1894 1895 va = trunc_page(va); 1896 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 1897 VM_OBJECT_ASSERT_LOCKED(m->object); 1898 pa = VM_PAGE_TO_PHYS(m); 1899 new_l3 = (pt_entry_t)(pa | ATTR_AF | L3_PAGE); 1900 if ((prot & VM_PROT_WRITE) == 0) 1901 new_l3 |= ATTR_AP(ATTR_AP_RO); 1902 if ((flags & PMAP_ENTER_WIRED) != 0) 1903 new_l3 |= ATTR_SW_WIRED; 1904 if ((va >> 63) == 0) 1905 new_l3 |= ATTR_AP(ATTR_AP_USER); 1906 new_l3 |= ATTR_IDX(m->md.pv_memattr); 1907 1908 mpte = NULL; 1909 1910 lock = NULL; 1911 rw_rlock(&pvh_global_lock); 1912 PMAP_LOCK(pmap); 1913 1914 if (va < VM_MAXUSER_ADDRESS) { 1915 nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; 1916 mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); 1917 if (mpte == NULL && nosleep) { 1918 if (lock != NULL) 1919 rw_wunlock(lock); 1920 rw_runlock(&pvh_global_lock); 1921 PMAP_UNLOCK(pmap); 1922 return (KERN_RESOURCE_SHORTAGE); 1923 } 1924 l3 = pmap_l3(pmap, va); 1925 } else { 1926 l3 = pmap_l3(pmap, va); 1927 /* TODO: This is not optimal, but should mostly work */ 1928 if (l3 == NULL) { 1929 l2 = pmap_l2(pmap, va); 1930 1931 if (l2 == NULL) { 1932 l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 1933 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1934 VM_ALLOC_ZERO); 1935 if (l2_m == NULL) 1936 panic("pmap_enter: l2 pte_m == NULL"); 1937 if ((l2_m->flags & PG_ZERO) == 0) 1938 pmap_zero_page(l2_m); 1939 1940 l2_pa = VM_PAGE_TO_PHYS(l2_m); 1941 l1 = pmap_l1(pmap, va); 1942 pmap_load_store(l1, l2_pa | L1_TABLE); 1943 PTE_SYNC(l1); 1944 l2 = pmap_l1_to_l2(l1, va); 1945 } 1946 1947 KASSERT(l2 != NULL, 1948 ("No l2 table after allocating one")); 1949 1950 l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 1951 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1952 if (l3_m == NULL) 1953 panic("pmap_enter: l3 pte_m == NULL"); 1954 if ((l3_m->flags & PG_ZERO) == 0) 1955 pmap_zero_page(l3_m); 1956 1957 l3_pa = VM_PAGE_TO_PHYS(l3_m); 1958 pmap_load_store(l2, l3_pa | L2_TABLE); 1959 PTE_SYNC(l2); 1960 l3 = pmap_l2_to_l3(l2, va); 1961 } 1962 } 1963 1964 om = NULL; 1965 orig_l3 = pmap_load(l3); 1966 opa = orig_l3 & ~ATTR_MASK; 1967 1968 /* 1969 * Is the specified virtual address already mapped? 1970 */ 1971 if (pmap_l3_valid(orig_l3)) { 1972 /* 1973 * Wiring change, just update stats. We don't worry about 1974 * wiring PT pages as they remain resident as long as there 1975 * are valid mappings in them. Hence, if a user page is wired, 1976 * the PT page will be also. 1977 */ 1978 if ((flags & PMAP_ENTER_WIRED) != 0 && 1979 (orig_l3 & ATTR_SW_WIRED) == 0) 1980 pmap->pm_stats.wired_count++; 1981 else if ((flags & PMAP_ENTER_WIRED) == 0 && 1982 (orig_l3 & ATTR_SW_WIRED) != 0) 1983 pmap->pm_stats.wired_count--; 1984 1985 /* 1986 * Remove the extra PT page reference. 1987 */ 1988 if (mpte != NULL) { 1989 mpte->wire_count--; 1990 KASSERT(mpte->wire_count > 0, 1991 ("pmap_enter: missing reference to page table page," 1992 " va: 0x%lx", va)); 1993 } 1994 1995 /* 1996 * Has the physical page changed? 1997 */ 1998 if (opa == pa) { 1999 /* 2000 * No, might be a protection or wiring change. 2001 */ 2002 if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 2003 new_l3 |= ATTR_SW_MANAGED; 2004 if ((new_l3 & ATTR_AP(ATTR_AP_RW)) == 2005 ATTR_AP(ATTR_AP_RW)) { 2006 vm_page_aflag_set(m, PGA_WRITEABLE); 2007 } 2008 } 2009 goto validate; 2010 } 2011 2012 /* Flush the cache, there might be uncommitted data in it */ 2013 if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3)) 2014 cpu_dcache_wb_range(va, L3_SIZE); 2015 } else { 2016 /* 2017 * Increment the counters. 2018 */ 2019 if ((new_l3 & ATTR_SW_WIRED) != 0) 2020 pmap->pm_stats.wired_count++; 2021 pmap_resident_count_inc(pmap, 1); 2022 } 2023 /* 2024 * Enter on the PV list if part of our managed memory. 2025 */ 2026 if ((m->oflags & VPO_UNMANAGED) == 0) { 2027 new_l3 |= ATTR_SW_MANAGED; 2028 pv = get_pv_entry(pmap, &lock); 2029 pv->pv_va = va; 2030 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); 2031 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2032 m->md.pv_gen++; 2033 if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) 2034 vm_page_aflag_set(m, PGA_WRITEABLE); 2035 } 2036 2037 /* 2038 * Update the L3 entry. 2039 */ 2040 if (orig_l3 != 0) { 2041 validate: 2042 orig_l3 = pmap_load_store(l3, new_l3); 2043 PTE_SYNC(l3); 2044 opa = orig_l3 & ~ATTR_MASK; 2045 2046 if (opa != pa) { 2047 if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 2048 om = PHYS_TO_VM_PAGE(opa); 2049 if (pmap_page_dirty(orig_l3)) 2050 vm_page_dirty(om); 2051 if ((orig_l3 & ATTR_AF) != 0) 2052 vm_page_aflag_set(om, PGA_REFERENCED); 2053 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); 2054 pmap_pvh_free(&om->md, pmap, va); 2055 } 2056 } else if (pmap_page_dirty(orig_l3)) { 2057 if ((orig_l3 & ATTR_SW_MANAGED) != 0) 2058 vm_page_dirty(m); 2059 } 2060 if ((orig_l3 & ATTR_AF) != 0) 2061 pmap_invalidate_page(pmap, va); 2062 } else { 2063 pmap_load_store(l3, new_l3); 2064 PTE_SYNC(l3); 2065 } 2066 if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) 2067 cpu_icache_sync_range(va, PAGE_SIZE); 2068 2069 if (lock != NULL) 2070 rw_wunlock(lock); 2071 rw_runlock(&pvh_global_lock); 2072 PMAP_UNLOCK(pmap); 2073 return (KERN_SUCCESS); 2074 } 2075 2076 /* 2077 * Maps a sequence of resident pages belonging to the same object. 2078 * The sequence begins with the given page m_start. This page is 2079 * mapped at the given virtual address start. Each subsequent page is 2080 * mapped at a virtual address that is offset from start by the same 2081 * amount as the page is offset from m_start within the object. The 2082 * last page in the sequence is the page with the largest offset from 2083 * m_start that can be mapped at a virtual address less than the given 2084 * virtual address end. Not every virtual page between start and end 2085 * is mapped; only those for which a resident page exists with the 2086 * corresponding offset from m_start are mapped. 2087 */ 2088 void 2089 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2090 vm_page_t m_start, vm_prot_t prot) 2091 { 2092 struct rwlock *lock; 2093 vm_offset_t va; 2094 vm_page_t m, mpte; 2095 vm_pindex_t diff, psize; 2096 2097 VM_OBJECT_ASSERT_LOCKED(m_start->object); 2098 2099 psize = atop(end - start); 2100 mpte = NULL; 2101 m = m_start; 2102 lock = NULL; 2103 rw_rlock(&pvh_global_lock); 2104 PMAP_LOCK(pmap); 2105 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2106 va = start + ptoa(diff); 2107 mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); 2108 m = TAILQ_NEXT(m, listq); 2109 } 2110 if (lock != NULL) 2111 rw_wunlock(lock); 2112 rw_runlock(&pvh_global_lock); 2113 PMAP_UNLOCK(pmap); 2114 } 2115 2116 /* 2117 * this code makes some *MAJOR* assumptions: 2118 * 1. Current pmap & pmap exists. 2119 * 2. Not wired. 2120 * 3. Read access. 2121 * 4. No page table pages. 2122 * but is *MUCH* faster than pmap_enter... 2123 */ 2124 2125 void 2126 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 2127 { 2128 struct rwlock *lock; 2129 2130 lock = NULL; 2131 rw_rlock(&pvh_global_lock); 2132 PMAP_LOCK(pmap); 2133 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); 2134 if (lock != NULL) 2135 rw_wunlock(lock); 2136 rw_runlock(&pvh_global_lock); 2137 PMAP_UNLOCK(pmap); 2138 } 2139 2140 static vm_page_t 2141 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 2142 vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) 2143 { 2144 struct spglist free; 2145 pd_entry_t *l2; 2146 pt_entry_t *l3; 2147 vm_paddr_t pa; 2148 2149 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 2150 (m->oflags & VPO_UNMANAGED) != 0, 2151 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 2152 rw_assert(&pvh_global_lock, RA_LOCKED); 2153 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2154 2155 /* 2156 * In the case that a page table page is not 2157 * resident, we are creating it here. 2158 */ 2159 if (va < VM_MAXUSER_ADDRESS) { 2160 vm_pindex_t l2pindex; 2161 2162 /* 2163 * Calculate pagetable page index 2164 */ 2165 l2pindex = pmap_l2_pindex(va); 2166 if (mpte && (mpte->pindex == l2pindex)) { 2167 mpte->wire_count++; 2168 } else { 2169 /* 2170 * Get the l2 entry 2171 */ 2172 l2 = pmap_l2(pmap, va); 2173 2174 /* 2175 * If the page table page is mapped, we just increment 2176 * the hold count, and activate it. Otherwise, we 2177 * attempt to allocate a page table page. If this 2178 * attempt fails, we don't retry. Instead, we give up. 2179 */ 2180 if (l2 != NULL && *l2 != 0) { 2181 mpte = PHYS_TO_VM_PAGE(*l2 & ~ATTR_MASK); 2182 mpte->wire_count++; 2183 } else { 2184 /* 2185 * Pass NULL instead of the PV list lock 2186 * pointer, because we don't intend to sleep. 2187 */ 2188 mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); 2189 if (mpte == NULL) 2190 return (mpte); 2191 } 2192 } 2193 l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); 2194 l3 = &l3[pmap_l3_index(va)]; 2195 } else { 2196 mpte = NULL; 2197 l3 = pmap_l3(kernel_pmap, va); 2198 } 2199 if (l3 == NULL) 2200 panic("pmap_enter_quick_locked: No l3"); 2201 if (*l3) { 2202 if (mpte != NULL) { 2203 mpte->wire_count--; 2204 mpte = NULL; 2205 } 2206 return (mpte); 2207 } 2208 2209 /* 2210 * Enter on the PV list if part of our managed memory. 2211 */ 2212 if ((m->oflags & VPO_UNMANAGED) == 0 && 2213 !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { 2214 if (mpte != NULL) { 2215 SLIST_INIT(&free); 2216 if (pmap_unwire_l3(pmap, va, mpte, &free)) { 2217 pmap_invalidate_page(pmap, va); 2218 pmap_free_zero_pages(&free); 2219 } 2220 mpte = NULL; 2221 } 2222 return (mpte); 2223 } 2224 2225 /* 2226 * Increment counters 2227 */ 2228 pmap_resident_count_inc(pmap, 1); 2229 2230 pa = VM_PAGE_TO_PHYS(m) | ATTR_AF | ATTR_IDX(m->md.pv_memattr) | 2231 ATTR_AP(ATTR_AP_RW) | L3_PAGE; 2232 2233 /* 2234 * Now validate mapping with RO protection 2235 */ 2236 if ((m->oflags & VPO_UNMANAGED) == 0) 2237 pa |= ATTR_SW_MANAGED; 2238 pmap_load_store(l3, pa); 2239 PTE_SYNC(l3); 2240 pmap_invalidate_page(pmap, va); 2241 return (mpte); 2242 } 2243 2244 /* 2245 * This code maps large physical mmap regions into the 2246 * processor address space. Note that some shortcuts 2247 * are taken, but the code works. 2248 */ 2249 void 2250 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 2251 vm_pindex_t pindex, vm_size_t size) 2252 { 2253 2254 panic("pmap_object_init_pt"); 2255 } 2256 2257 /* 2258 * Clear the wired attribute from the mappings for the specified range of 2259 * addresses in the given pmap. Every valid mapping within that range 2260 * must have the wired attribute set. In contrast, invalid mappings 2261 * cannot have the wired attribute set, so they are ignored. 2262 * 2263 * The wired attribute of the page table entry is not a hardware feature, 2264 * so there is no need to invalidate any TLB entries. 2265 */ 2266 void 2267 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2268 { 2269 vm_offset_t va_next; 2270 pd_entry_t *l1, *l2; 2271 pt_entry_t *l3; 2272 boolean_t pv_lists_locked; 2273 2274 pv_lists_locked = FALSE; 2275 PMAP_LOCK(pmap); 2276 for (; sva < eva; sva = va_next) { 2277 l1 = pmap_l1(pmap, sva); 2278 if (*l1 == 0) { 2279 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2280 if (va_next < sva) 2281 va_next = eva; 2282 continue; 2283 } 2284 2285 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2286 if (va_next < sva) 2287 va_next = eva; 2288 2289 l2 = pmap_l1_to_l2(l1, sva); 2290 if (*l2 == 0) 2291 continue; 2292 2293 if (va_next > eva) 2294 va_next = eva; 2295 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 2296 sva += L3_SIZE) { 2297 if (*l3 == 0) 2298 continue; 2299 if ((*l3 & ATTR_SW_WIRED) == 0) 2300 panic("pmap_unwire: l3 %#jx is missing " 2301 "ATTR_SW_WIRED", (uintmax_t)*l3); 2302 2303 /* 2304 * PG_W must be cleared atomically. Although the pmap 2305 * lock synchronizes access to PG_W, another processor 2306 * could be setting PG_M and/or PG_A concurrently. 2307 */ 2308 atomic_clear_long(l3, ATTR_SW_WIRED); 2309 pmap->pm_stats.wired_count--; 2310 } 2311 } 2312 if (pv_lists_locked) 2313 rw_runlock(&pvh_global_lock); 2314 PMAP_UNLOCK(pmap); 2315 } 2316 2317 /* 2318 * Copy the range specified by src_addr/len 2319 * from the source map to the range dst_addr/len 2320 * in the destination map. 2321 * 2322 * This routine is only advisory and need not do anything. 2323 */ 2324 2325 void 2326 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2327 vm_offset_t src_addr) 2328 { 2329 } 2330 2331 /* 2332 * pmap_zero_page zeros the specified hardware page by mapping 2333 * the page into KVM and using bzero to clear its contents. 2334 */ 2335 void 2336 pmap_zero_page(vm_page_t m) 2337 { 2338 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2339 2340 pagezero((void *)va); 2341 } 2342 2343 /* 2344 * pmap_zero_page_area zeros the specified hardware page by mapping 2345 * the page into KVM and using bzero to clear its contents. 2346 * 2347 * off and size may not cover an area beyond a single hardware page. 2348 */ 2349 void 2350 pmap_zero_page_area(vm_page_t m, int off, int size) 2351 { 2352 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2353 2354 if (off == 0 && size == PAGE_SIZE) 2355 pagezero((void *)va); 2356 else 2357 bzero((char *)va + off, size); 2358 } 2359 2360 /* 2361 * pmap_zero_page_idle zeros the specified hardware page by mapping 2362 * the page into KVM and using bzero to clear its contents. This 2363 * is intended to be called from the vm_pagezero process only and 2364 * outside of Giant. 2365 */ 2366 void 2367 pmap_zero_page_idle(vm_page_t m) 2368 { 2369 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2370 2371 pagezero((void *)va); 2372 } 2373 2374 /* 2375 * pmap_copy_page copies the specified (machine independent) 2376 * page by mapping the page into virtual memory and using 2377 * bcopy to copy the page, one machine dependent page at a 2378 * time. 2379 */ 2380 void 2381 pmap_copy_page(vm_page_t msrc, vm_page_t mdst) 2382 { 2383 vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); 2384 vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); 2385 2386 pagecopy((void *)src, (void *)dst); 2387 } 2388 2389 int unmapped_buf_allowed = 1; 2390 2391 void 2392 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 2393 vm_offset_t b_offset, int xfersize) 2394 { 2395 void *a_cp, *b_cp; 2396 vm_page_t m_a, m_b; 2397 vm_paddr_t p_a, p_b; 2398 vm_offset_t a_pg_offset, b_pg_offset; 2399 int cnt; 2400 2401 while (xfersize > 0) { 2402 a_pg_offset = a_offset & PAGE_MASK; 2403 m_a = ma[a_offset >> PAGE_SHIFT]; 2404 p_a = m_a->phys_addr; 2405 b_pg_offset = b_offset & PAGE_MASK; 2406 m_b = mb[b_offset >> PAGE_SHIFT]; 2407 p_b = m_b->phys_addr; 2408 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2409 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2410 if (__predict_false(!PHYS_IN_DMAP(p_a))) { 2411 panic("!DMAP a %lx", p_a); 2412 } else { 2413 a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; 2414 } 2415 if (__predict_false(!PHYS_IN_DMAP(p_b))) { 2416 panic("!DMAP b %lx", p_b); 2417 } else { 2418 b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; 2419 } 2420 bcopy(a_cp, b_cp, cnt); 2421 a_offset += cnt; 2422 b_offset += cnt; 2423 xfersize -= cnt; 2424 } 2425 } 2426 2427 /* 2428 * Returns true if the pmap's pv is one of the first 2429 * 16 pvs linked to from this page. This count may 2430 * be changed upwards or downwards in the future; it 2431 * is only necessary that true be returned for a small 2432 * subset of pmaps for proper page aging. 2433 */ 2434 boolean_t 2435 pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2436 { 2437 struct rwlock *lock; 2438 pv_entry_t pv; 2439 int loops = 0; 2440 boolean_t rv; 2441 2442 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2443 ("pmap_page_exists_quick: page %p is not managed", m)); 2444 rv = FALSE; 2445 rw_rlock(&pvh_global_lock); 2446 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2447 rw_rlock(lock); 2448 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2449 if (PV_PMAP(pv) == pmap) { 2450 rv = TRUE; 2451 break; 2452 } 2453 loops++; 2454 if (loops >= 16) 2455 break; 2456 } 2457 rw_runlock(lock); 2458 rw_runlock(&pvh_global_lock); 2459 return (rv); 2460 } 2461 2462 /* 2463 * pmap_page_wired_mappings: 2464 * 2465 * Return the number of managed mappings to the given physical page 2466 * that are wired. 2467 */ 2468 int 2469 pmap_page_wired_mappings(vm_page_t m) 2470 { 2471 struct rwlock *lock; 2472 pmap_t pmap; 2473 pt_entry_t *l3; 2474 pv_entry_t pv; 2475 int count, md_gen; 2476 2477 if ((m->oflags & VPO_UNMANAGED) != 0) 2478 return (0); 2479 rw_rlock(&pvh_global_lock); 2480 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2481 rw_rlock(lock); 2482 restart: 2483 count = 0; 2484 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2485 pmap = PV_PMAP(pv); 2486 if (!PMAP_TRYLOCK(pmap)) { 2487 md_gen = m->md.pv_gen; 2488 rw_runlock(lock); 2489 PMAP_LOCK(pmap); 2490 rw_rlock(lock); 2491 if (md_gen != m->md.pv_gen) { 2492 PMAP_UNLOCK(pmap); 2493 goto restart; 2494 } 2495 } 2496 l3 = pmap_l3(pmap, pv->pv_va); 2497 if (l3 != NULL && (*l3 & ATTR_SW_WIRED) != 0) 2498 count++; 2499 PMAP_UNLOCK(pmap); 2500 } 2501 rw_runlock(lock); 2502 rw_runlock(&pvh_global_lock); 2503 return (count); 2504 } 2505 2506 /* 2507 * Destroy all managed, non-wired mappings in the given user-space 2508 * pmap. This pmap cannot be active on any processor besides the 2509 * caller. 2510 * 2511 * This function cannot be applied to the kernel pmap. Moreover, it 2512 * is not intended for general use. It is only to be used during 2513 * process termination. Consequently, it can be implemented in ways 2514 * that make it faster than pmap_remove(). First, it can more quickly 2515 * destroy mappings by iterating over the pmap's collection of PV 2516 * entries, rather than searching the page table. Second, it doesn't 2517 * have to test and clear the page table entries atomically, because 2518 * no processor is currently accessing the user address space. In 2519 * particular, a page table entry's dirty bit won't change state once 2520 * this function starts. 2521 */ 2522 void 2523 pmap_remove_pages(pmap_t pmap) 2524 { 2525 pd_entry_t ptepde, *l2; 2526 pt_entry_t *l3, tl3; 2527 struct spglist free; 2528 vm_page_t m; 2529 pv_entry_t pv; 2530 struct pv_chunk *pc, *npc; 2531 struct rwlock *lock; 2532 int64_t bit; 2533 uint64_t inuse, bitmask; 2534 int allfree, field, freed, idx; 2535 vm_paddr_t pa; 2536 2537 lock = NULL; 2538 2539 SLIST_INIT(&free); 2540 rw_rlock(&pvh_global_lock); 2541 PMAP_LOCK(pmap); 2542 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 2543 allfree = 1; 2544 freed = 0; 2545 for (field = 0; field < _NPCM; field++) { 2546 inuse = ~pc->pc_map[field] & pc_freemask[field]; 2547 while (inuse != 0) { 2548 bit = ffsl(inuse) - 1; 2549 bitmask = 1UL << bit; 2550 idx = field * 64 + bit; 2551 pv = &pc->pc_pventry[idx]; 2552 inuse &= ~bitmask; 2553 2554 l2 = pmap_l2(pmap, pv->pv_va); 2555 ptepde = pmap_load(l2); 2556 l3 = pmap_l2_to_l3(l2, pv->pv_va); 2557 tl3 = pmap_load(l3); 2558 2559 /* 2560 * We cannot remove wired pages from a process' mapping at this time 2561 */ 2562 if (tl3 & ATTR_SW_WIRED) { 2563 allfree = 0; 2564 continue; 2565 } 2566 2567 pa = tl3 & ~ATTR_MASK; 2568 2569 m = PHYS_TO_VM_PAGE(pa); 2570 KASSERT(m->phys_addr == pa, 2571 ("vm_page_t %p phys_addr mismatch %016jx %016jx", 2572 m, (uintmax_t)m->phys_addr, 2573 (uintmax_t)tl3)); 2574 2575 KASSERT((m->flags & PG_FICTITIOUS) != 0 || 2576 m < &vm_page_array[vm_page_array_size], 2577 ("pmap_remove_pages: bad l3 %#jx", 2578 (uintmax_t)tl3)); 2579 2580 if (pmap_is_current(pmap) && 2581 pmap_l3_valid_cacheable(pmap_load(l3))) 2582 cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 2583 pmap_load_clear(l3); 2584 PTE_SYNC(l3); 2585 2586 /* 2587 * Update the vm_page_t clean/reference bits. 2588 */ 2589 if ((tl3 & ATTR_AP_RW_BIT) == 2590 ATTR_AP(ATTR_AP_RW)) 2591 vm_page_dirty(m); 2592 2593 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); 2594 2595 /* Mark free */ 2596 pc->pc_map[field] |= bitmask; 2597 2598 pmap_resident_count_dec(pmap, 1); 2599 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2600 m->md.pv_gen++; 2601 2602 pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free); 2603 freed++; 2604 } 2605 } 2606 PV_STAT(atomic_add_long(&pv_entry_frees, freed)); 2607 PV_STAT(atomic_add_int(&pv_entry_spare, freed)); 2608 PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); 2609 if (allfree) { 2610 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2611 free_pv_chunk(pc); 2612 } 2613 } 2614 pmap_invalidate_all(pmap); 2615 if (lock != NULL) 2616 rw_wunlock(lock); 2617 rw_runlock(&pvh_global_lock); 2618 PMAP_UNLOCK(pmap); 2619 pmap_free_zero_pages(&free); 2620 } 2621 2622 /* 2623 * This is used to check if a page has been accessed or modified. As we 2624 * don't have a bit to see if it has been modified we have to assume it 2625 * has been if the page is read/write. 2626 */ 2627 static boolean_t 2628 pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) 2629 { 2630 struct rwlock *lock; 2631 pv_entry_t pv; 2632 pt_entry_t *l3, mask, value; 2633 pmap_t pmap; 2634 int md_gen; 2635 boolean_t rv; 2636 2637 rv = FALSE; 2638 rw_rlock(&pvh_global_lock); 2639 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2640 rw_rlock(lock); 2641 restart: 2642 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2643 pmap = PV_PMAP(pv); 2644 if (!PMAP_TRYLOCK(pmap)) { 2645 md_gen = m->md.pv_gen; 2646 rw_runlock(lock); 2647 PMAP_LOCK(pmap); 2648 rw_rlock(lock); 2649 if (md_gen != m->md.pv_gen) { 2650 PMAP_UNLOCK(pmap); 2651 goto restart; 2652 } 2653 } 2654 l3 = pmap_l3(pmap, pv->pv_va); 2655 mask = 0; 2656 value = 0; 2657 if (modified) { 2658 mask |= ATTR_AP_RW_BIT; 2659 value |= ATTR_AP(ATTR_AP_RW); 2660 } 2661 if (accessed) { 2662 mask |= ATTR_AF | ATTR_DESCR_MASK; 2663 value |= ATTR_AF | L3_PAGE; 2664 } 2665 rv = (pmap_load(l3) & mask) == value; 2666 PMAP_UNLOCK(pmap); 2667 if (rv) 2668 goto out; 2669 } 2670 out: 2671 rw_runlock(lock); 2672 rw_runlock(&pvh_global_lock); 2673 return (rv); 2674 } 2675 2676 /* 2677 * pmap_is_modified: 2678 * 2679 * Return whether or not the specified physical page was modified 2680 * in any physical maps. 2681 */ 2682 boolean_t 2683 pmap_is_modified(vm_page_t m) 2684 { 2685 2686 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2687 ("pmap_is_modified: page %p is not managed", m)); 2688 2689 /* 2690 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2691 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 2692 * is clear, no PTEs can have PG_M set. 2693 */ 2694 VM_OBJECT_ASSERT_WLOCKED(m->object); 2695 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2696 return (FALSE); 2697 return (pmap_page_test_mappings(m, FALSE, TRUE)); 2698 } 2699 2700 /* 2701 * pmap_is_prefaultable: 2702 * 2703 * Return whether or not the specified virtual address is eligible 2704 * for prefault. 2705 */ 2706 boolean_t 2707 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2708 { 2709 pt_entry_t *l3; 2710 boolean_t rv; 2711 2712 rv = FALSE; 2713 PMAP_LOCK(pmap); 2714 l3 = pmap_l3(pmap, addr); 2715 if (l3 != NULL && *l3 != 0) { 2716 rv = TRUE; 2717 } 2718 PMAP_UNLOCK(pmap); 2719 return (rv); 2720 } 2721 2722 /* 2723 * pmap_is_referenced: 2724 * 2725 * Return whether or not the specified physical page was referenced 2726 * in any physical maps. 2727 */ 2728 boolean_t 2729 pmap_is_referenced(vm_page_t m) 2730 { 2731 2732 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2733 ("pmap_is_referenced: page %p is not managed", m)); 2734 return (pmap_page_test_mappings(m, TRUE, FALSE)); 2735 } 2736 2737 /* 2738 * Clear the write and modified bits in each of the given page's mappings. 2739 */ 2740 void 2741 pmap_remove_write(vm_page_t m) 2742 { 2743 pmap_t pmap; 2744 struct rwlock *lock; 2745 pv_entry_t pv; 2746 pt_entry_t *l3, oldl3; 2747 int md_gen; 2748 2749 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2750 ("pmap_remove_write: page %p is not managed", m)); 2751 2752 /* 2753 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2754 * set by another thread while the object is locked. Thus, 2755 * if PGA_WRITEABLE is clear, no page table entries need updating. 2756 */ 2757 VM_OBJECT_ASSERT_WLOCKED(m->object); 2758 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2759 return; 2760 rw_rlock(&pvh_global_lock); 2761 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2762 retry_pv_loop: 2763 rw_wlock(lock); 2764 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2765 pmap = PV_PMAP(pv); 2766 if (!PMAP_TRYLOCK(pmap)) { 2767 md_gen = m->md.pv_gen; 2768 rw_wunlock(lock); 2769 PMAP_LOCK(pmap); 2770 rw_wlock(lock); 2771 if (md_gen != m->md.pv_gen) { 2772 PMAP_UNLOCK(pmap); 2773 rw_wunlock(lock); 2774 goto retry_pv_loop; 2775 } 2776 } 2777 l3 = pmap_l3(pmap, pv->pv_va); 2778 retry: 2779 oldl3 = *l3; 2780 if ((oldl3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) { 2781 if (!atomic_cmpset_long(l3, oldl3, 2782 oldl3 | ATTR_AP(ATTR_AP_RO))) 2783 goto retry; 2784 if ((oldl3 & ATTR_AF) != 0) 2785 vm_page_dirty(m); 2786 pmap_invalidate_page(pmap, pv->pv_va); 2787 } 2788 PMAP_UNLOCK(pmap); 2789 } 2790 rw_wunlock(lock); 2791 vm_page_aflag_clear(m, PGA_WRITEABLE); 2792 rw_runlock(&pvh_global_lock); 2793 } 2794 2795 static __inline boolean_t 2796 safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) 2797 { 2798 2799 return (FALSE); 2800 } 2801 2802 #define PMAP_TS_REFERENCED_MAX 5 2803 2804 /* 2805 * pmap_ts_referenced: 2806 * 2807 * Return a count of reference bits for a page, clearing those bits. 2808 * It is not necessary for every reference bit to be cleared, but it 2809 * is necessary that 0 only be returned when there are truly no 2810 * reference bits set. 2811 * 2812 * XXX: The exact number of bits to check and clear is a matter that 2813 * should be tested and standardized at some point in the future for 2814 * optimal aging of shared pages. 2815 */ 2816 int 2817 pmap_ts_referenced(vm_page_t m) 2818 { 2819 pv_entry_t pv, pvf; 2820 pmap_t pmap; 2821 struct rwlock *lock; 2822 pd_entry_t *l2; 2823 pt_entry_t *l3; 2824 vm_paddr_t pa; 2825 int cleared, md_gen, not_cleared; 2826 struct spglist free; 2827 2828 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2829 ("pmap_ts_referenced: page %p is not managed", m)); 2830 SLIST_INIT(&free); 2831 cleared = 0; 2832 pa = VM_PAGE_TO_PHYS(m); 2833 lock = PHYS_TO_PV_LIST_LOCK(pa); 2834 rw_rlock(&pvh_global_lock); 2835 rw_wlock(lock); 2836 retry: 2837 not_cleared = 0; 2838 if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) 2839 goto out; 2840 pv = pvf; 2841 do { 2842 if (pvf == NULL) 2843 pvf = pv; 2844 pmap = PV_PMAP(pv); 2845 if (!PMAP_TRYLOCK(pmap)) { 2846 md_gen = m->md.pv_gen; 2847 rw_wunlock(lock); 2848 PMAP_LOCK(pmap); 2849 rw_wlock(lock); 2850 if (md_gen != m->md.pv_gen) { 2851 PMAP_UNLOCK(pmap); 2852 goto retry; 2853 } 2854 } 2855 l2 = pmap_l2(pmap, pv->pv_va); 2856 KASSERT((*l2 & ATTR_DESCR_MASK) == L2_TABLE, 2857 ("pmap_ts_referenced: found an invalid l2 table")); 2858 l3 = pmap_l2_to_l3(l2, pv->pv_va); 2859 if ((*l3 & ATTR_AF) != 0) { 2860 if (safe_to_clear_referenced(pmap, *l3)) { 2861 /* 2862 * TODO: We don't handle the access flag 2863 * at all. We need to be able to set it in 2864 * the exception handler. 2865 */ 2866 panic("TODO: safe_to_clear_referenced\n"); 2867 } else if ((*l3 & ATTR_SW_WIRED) == 0) { 2868 /* 2869 * Wired pages cannot be paged out so 2870 * doing accessed bit emulation for 2871 * them is wasted effort. We do the 2872 * hard work for unwired pages only. 2873 */ 2874 pmap_remove_l3(pmap, l3, pv->pv_va, 2875 *l2, &free, &lock); 2876 pmap_invalidate_page(pmap, pv->pv_va); 2877 cleared++; 2878 if (pvf == pv) 2879 pvf = NULL; 2880 pv = NULL; 2881 KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 2882 ("inconsistent pv lock %p %p for page %p", 2883 lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 2884 } else 2885 not_cleared++; 2886 } 2887 PMAP_UNLOCK(pmap); 2888 /* Rotate the PV list if it has more than one entry. */ 2889 if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { 2890 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2891 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2892 m->md.pv_gen++; 2893 } 2894 } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + 2895 not_cleared < PMAP_TS_REFERENCED_MAX); 2896 out: 2897 rw_wunlock(lock); 2898 rw_runlock(&pvh_global_lock); 2899 pmap_free_zero_pages(&free); 2900 return (cleared + not_cleared); 2901 } 2902 2903 /* 2904 * Apply the given advice to the specified range of addresses within the 2905 * given pmap. Depending on the advice, clear the referenced and/or 2906 * modified flags in each mapping and set the mapped page's dirty field. 2907 */ 2908 void 2909 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 2910 { 2911 } 2912 2913 /* 2914 * Clear the modify bits on the specified physical page. 2915 */ 2916 void 2917 pmap_clear_modify(vm_page_t m) 2918 { 2919 2920 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2921 ("pmap_clear_modify: page %p is not managed", m)); 2922 VM_OBJECT_ASSERT_WLOCKED(m->object); 2923 KASSERT(!vm_page_xbusied(m), 2924 ("pmap_clear_modify: page %p is exclusive busied", m)); 2925 2926 /* 2927 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. 2928 * If the object containing the page is locked and the page is not 2929 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 2930 */ 2931 if ((m->aflags & PGA_WRITEABLE) == 0) 2932 return; 2933 panic("pmap_clear_modify"); 2934 } 2935 2936 /* 2937 * Sets the memory attribute for the specified page. 2938 */ 2939 void 2940 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 2941 { 2942 2943 panic("pmap_page_set_memattr"); 2944 } 2945 2946 /* 2947 * perform the pmap work for mincore 2948 */ 2949 int 2950 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 2951 { 2952 2953 panic("pmap_mincore"); 2954 } 2955 2956 void 2957 pmap_activate(struct thread *td) 2958 { 2959 pmap_t pmap; 2960 2961 critical_enter(); 2962 pmap = vmspace_pmap(td->td_proc->p_vmspace); 2963 td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1); 2964 __asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l1addr)); 2965 critical_exit(); 2966 } 2967 2968 void 2969 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 2970 { 2971 2972 panic("pmap_sync_icache"); 2973 } 2974 2975 /* 2976 * Increase the starting virtual address of the given mapping if a 2977 * different alignment might result in more superpage mappings. 2978 */ 2979 void 2980 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 2981 vm_offset_t *addr, vm_size_t size) 2982 { 2983 } 2984 2985 /** 2986 * Get the kernel virtual address of a set of physical pages. If there are 2987 * physical addresses not covered by the DMAP perform a transient mapping 2988 * that will be removed when calling pmap_unmap_io_transient. 2989 * 2990 * \param page The pages the caller wishes to obtain the virtual 2991 * address on the kernel memory map. 2992 * \param vaddr On return contains the kernel virtual memory address 2993 * of the pages passed in the page parameter. 2994 * \param count Number of pages passed in. 2995 * \param can_fault TRUE if the thread using the mapped pages can take 2996 * page faults, FALSE otherwise. 2997 * 2998 * \returns TRUE if the caller must call pmap_unmap_io_transient when 2999 * finished or FALSE otherwise. 3000 * 3001 */ 3002 boolean_t 3003 pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 3004 boolean_t can_fault) 3005 { 3006 vm_paddr_t paddr; 3007 boolean_t needs_mapping; 3008 int error, i; 3009 3010 /* 3011 * Allocate any KVA space that we need, this is done in a separate 3012 * loop to prevent calling vmem_alloc while pinned. 3013 */ 3014 needs_mapping = FALSE; 3015 for (i = 0; i < count; i++) { 3016 paddr = VM_PAGE_TO_PHYS(page[i]); 3017 if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) { 3018 error = vmem_alloc(kernel_arena, PAGE_SIZE, 3019 M_BESTFIT | M_WAITOK, &vaddr[i]); 3020 KASSERT(error == 0, ("vmem_alloc failed: %d", error)); 3021 needs_mapping = TRUE; 3022 } else { 3023 vaddr[i] = PHYS_TO_DMAP(paddr); 3024 } 3025 } 3026 3027 /* Exit early if everything is covered by the DMAP */ 3028 if (!needs_mapping) 3029 return (FALSE); 3030 3031 /* 3032 * NB: The sequence of updating a page table followed by accesses 3033 * to the corresponding pages used in the !DMAP case is subject to 3034 * the situation described in the "AMD64 Architecture Programmer's 3035 * Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special 3036 * Coherency Considerations". Therefore, issuing the INVLPG right 3037 * after modifying the PTE bits is crucial. 3038 */ 3039 if (!can_fault) 3040 sched_pin(); 3041 for (i = 0; i < count; i++) { 3042 paddr = VM_PAGE_TO_PHYS(page[i]); 3043 if (paddr >= DMAP_MAX_PHYSADDR) { 3044 panic( 3045 "pmap_map_io_transient: TODO: Map out of DMAP data"); 3046 } 3047 } 3048 3049 return (needs_mapping); 3050 } 3051 3052 void 3053 pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 3054 boolean_t can_fault) 3055 { 3056 vm_paddr_t paddr; 3057 int i; 3058 3059 if (!can_fault) 3060 sched_unpin(); 3061 for (i = 0; i < count; i++) { 3062 paddr = VM_PAGE_TO_PHYS(page[i]); 3063 if (paddr >= DMAP_MAX_PHYSADDR) { 3064 panic("pmap_unmap_io_transient: TODO: Unmap data"); 3065 } 3066 } 3067 } 3068