1 /*- 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * Copyright (c) 2003 Peter Wemm 9 * All rights reserved. 10 * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> 11 * All rights reserved. 12 * Copyright (c) 2014 Andrew Turner 13 * All rights reserved. 14 * Copyright (c) 2014 The FreeBSD Foundation 15 * All rights reserved. 16 * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com> 17 * All rights reserved. 18 * 19 * This code is derived from software contributed to Berkeley by 20 * the Systems Programming Group of the University of Utah Computer 21 * Science Department and William Jolitz of UUNET Technologies Inc. 22 * 23 * Portions of this software were developed by Andrew Turner under 24 * sponsorship from The FreeBSD Foundation. 25 * 26 * Portions of this software were developed by SRI International and the 27 * University of Cambridge Computer Laboratory under DARPA/AFRL contract 28 * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. 29 * 30 * Portions of this software were developed by the University of Cambridge 31 * Computer Laboratory as part of the CTSRD Project, with support from the 32 * UK Higher Education Innovation Fund (HEIF). 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. All advertising materials mentioning features or use of this software 43 * must display the following acknowledgement: 44 * This product includes software developed by the University of 45 * California, Berkeley and its contributors. 46 * 4. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 63 */ 64 /*- 65 * Copyright (c) 2003 Networks Associates Technology, Inc. 66 * All rights reserved. 67 * 68 * This software was developed for the FreeBSD Project by Jake Burkholder, 69 * Safeport Network Services, and Network Associates Laboratories, the 70 * Security Research Division of Network Associates, Inc. under 71 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 72 * CHATS research program. 73 * 74 * Redistribution and use in source and binary forms, with or without 75 * modification, are permitted provided that the following conditions 76 * are met: 77 * 1. Redistributions of source code must retain the above copyright 78 * notice, this list of conditions and the following disclaimer. 79 * 2. Redistributions in binary form must reproduce the above copyright 80 * notice, this list of conditions and the following disclaimer in the 81 * documentation and/or other materials provided with the distribution. 82 * 83 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 84 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 85 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 86 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 87 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 88 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 89 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 90 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 91 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 92 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 93 * SUCH DAMAGE. 94 */ 95 96 #include <sys/cdefs.h> 97 __FBSDID("$FreeBSD$"); 98 99 /* 100 * Manages physical address maps. 101 * 102 * Since the information managed by this module is 103 * also stored by the logical address mapping module, 104 * this module may throw away valid virtual-to-physical 105 * mappings at almost any time. However, invalidations 106 * of virtual-to-physical mappings must be done as 107 * requested. 108 * 109 * In order to cope with hardware architectures which 110 * make virtual-to-physical map invalidates expensive, 111 * this module may delay invalidate or reduced protection 112 * operations until such time as they are actually 113 * necessary. This module is given full information as 114 * to which processors are currently using which maps, 115 * and to when physical maps must be made correct. 116 */ 117 118 #include <sys/param.h> 119 #include <sys/bus.h> 120 #include <sys/systm.h> 121 #include <sys/kernel.h> 122 #include <sys/ktr.h> 123 #include <sys/lock.h> 124 #include <sys/malloc.h> 125 #include <sys/mman.h> 126 #include <sys/msgbuf.h> 127 #include <sys/mutex.h> 128 #include <sys/proc.h> 129 #include <sys/rwlock.h> 130 #include <sys/sx.h> 131 #include <sys/vmem.h> 132 #include <sys/vmmeter.h> 133 #include <sys/sched.h> 134 #include <sys/sysctl.h> 135 #include <sys/smp.h> 136 137 #include <vm/vm.h> 138 #include <vm/vm_param.h> 139 #include <vm/vm_kern.h> 140 #include <vm/vm_page.h> 141 #include <vm/vm_map.h> 142 #include <vm/vm_object.h> 143 #include <vm/vm_extern.h> 144 #include <vm/vm_pageout.h> 145 #include <vm/vm_pager.h> 146 #include <vm/vm_radix.h> 147 #include <vm/vm_reserv.h> 148 #include <vm/uma.h> 149 150 #include <machine/machdep.h> 151 #include <machine/md_var.h> 152 #include <machine/pcb.h> 153 154 #define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) 155 #define NUPDE (NPDEPG * NPDEPG) 156 #define NUSERPGTBLS (NUPDE + NPDEPG) 157 158 #if !defined(DIAGNOSTIC) 159 #ifdef __GNUC_GNU_INLINE__ 160 #define PMAP_INLINE __attribute__((__gnu_inline__)) inline 161 #else 162 #define PMAP_INLINE extern inline 163 #endif 164 #else 165 #define PMAP_INLINE 166 #endif 167 168 #ifdef PV_STATS 169 #define PV_STAT(x) do { x ; } while (0) 170 #else 171 #define PV_STAT(x) do { } while (0) 172 #endif 173 174 #define pmap_l2_pindex(v) ((v) >> L2_SHIFT) 175 176 #define NPV_LIST_LOCKS MAXCPU 177 178 #define PHYS_TO_PV_LIST_LOCK(pa) \ 179 (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) 180 181 #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ 182 struct rwlock **_lockp = (lockp); \ 183 struct rwlock *_new_lock; \ 184 \ 185 _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ 186 if (_new_lock != *_lockp) { \ 187 if (*_lockp != NULL) \ 188 rw_wunlock(*_lockp); \ 189 *_lockp = _new_lock; \ 190 rw_wlock(*_lockp); \ 191 } \ 192 } while (0) 193 194 #define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ 195 CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) 196 197 #define RELEASE_PV_LIST_LOCK(lockp) do { \ 198 struct rwlock **_lockp = (lockp); \ 199 \ 200 if (*_lockp != NULL) { \ 201 rw_wunlock(*_lockp); \ 202 *_lockp = NULL; \ 203 } \ 204 } while (0) 205 206 #define VM_PAGE_TO_PV_LIST_LOCK(m) \ 207 PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) 208 209 /* The list of all the user pmaps */ 210 LIST_HEAD(pmaplist, pmap); 211 static struct pmaplist allpmaps; 212 213 static MALLOC_DEFINE(M_VMPMAP, "pmap", "PMAP L1"); 214 215 struct pmap kernel_pmap_store; 216 217 vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 218 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 219 vm_offset_t kernel_vm_end = 0; 220 221 struct msgbuf *msgbufp = NULL; 222 223 vm_paddr_t dmap_phys_base; /* The start of the dmap region */ 224 vm_paddr_t dmap_phys_max; /* The limit of the dmap region */ 225 vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */ 226 227 /* This code assumes all L1 DMAP entries will be used */ 228 CTASSERT((DMAP_MIN_ADDRESS & ~L1_OFFSET) == DMAP_MIN_ADDRESS); 229 CTASSERT((DMAP_MAX_ADDRESS & ~L1_OFFSET) == DMAP_MAX_ADDRESS); 230 231 static struct rwlock_padalign pvh_global_lock; 232 233 /* 234 * Data for the pv entry allocation mechanism 235 */ 236 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 237 static struct mtx pv_chunks_mutex; 238 static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; 239 240 static void free_pv_chunk(struct pv_chunk *pc); 241 static void free_pv_entry(pmap_t pmap, pv_entry_t pv); 242 static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); 243 static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); 244 static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 245 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 246 vm_offset_t va); 247 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 248 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); 249 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, 250 pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); 251 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 252 vm_page_t m, struct rwlock **lockp); 253 254 static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, 255 struct rwlock **lockp); 256 257 static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, 258 struct spglist *free); 259 static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); 260 261 /* 262 * These load the old table data and store the new value. 263 * They need to be atomic as the System MMU may write to the table at 264 * the same time as the CPU. 265 */ 266 #define pmap_load_store(table, entry) atomic_swap_64(table, entry) 267 #define pmap_set(table, mask) atomic_set_64(table, mask) 268 #define pmap_load_clear(table) atomic_swap_64(table, 0) 269 #define pmap_load(table) (*table) 270 271 /********************/ 272 /* Inline functions */ 273 /********************/ 274 275 static __inline void 276 pagecopy(void *s, void *d) 277 { 278 279 memcpy(d, s, PAGE_SIZE); 280 } 281 282 static __inline void 283 pagezero(void *p) 284 { 285 286 bzero(p, PAGE_SIZE); 287 } 288 289 #define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) 290 #define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) 291 #define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) 292 293 #define PTE_TO_PHYS(pte) ((pte >> PTE_PPN0_S) * PAGE_SIZE) 294 295 static __inline pd_entry_t * 296 pmap_l1(pmap_t pmap, vm_offset_t va) 297 { 298 299 return (&pmap->pm_l1[pmap_l1_index(va)]); 300 } 301 302 static __inline pd_entry_t * 303 pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) 304 { 305 vm_paddr_t phys; 306 pd_entry_t *l2; 307 308 phys = PTE_TO_PHYS(pmap_load(l1)); 309 l2 = (pd_entry_t *)PHYS_TO_DMAP(phys); 310 311 return (&l2[pmap_l2_index(va)]); 312 } 313 314 static __inline pd_entry_t * 315 pmap_l2(pmap_t pmap, vm_offset_t va) 316 { 317 pd_entry_t *l1; 318 319 l1 = pmap_l1(pmap, va); 320 if (l1 == NULL) 321 return (NULL); 322 if ((pmap_load(l1) & PTE_V) == 0) 323 return (NULL); 324 if ((pmap_load(l1) & PTE_RX) != 0) 325 return (NULL); 326 327 return (pmap_l1_to_l2(l1, va)); 328 } 329 330 static __inline pt_entry_t * 331 pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) 332 { 333 vm_paddr_t phys; 334 pt_entry_t *l3; 335 336 phys = PTE_TO_PHYS(pmap_load(l2)); 337 l3 = (pd_entry_t *)PHYS_TO_DMAP(phys); 338 339 return (&l3[pmap_l3_index(va)]); 340 } 341 342 static __inline pt_entry_t * 343 pmap_l3(pmap_t pmap, vm_offset_t va) 344 { 345 pd_entry_t *l2; 346 347 l2 = pmap_l2(pmap, va); 348 if (l2 == NULL) 349 return (NULL); 350 if ((pmap_load(l2) & PTE_V) == 0) 351 return (NULL); 352 if ((pmap_load(l2) & PTE_RX) != 0) 353 return (NULL); 354 355 return (pmap_l2_to_l3(l2, va)); 356 } 357 358 359 static __inline int 360 pmap_is_write(pt_entry_t entry) 361 { 362 363 return (entry & PTE_W); 364 } 365 366 static __inline int 367 pmap_is_current(pmap_t pmap) 368 { 369 370 return ((pmap == pmap_kernel()) || 371 (pmap == curthread->td_proc->p_vmspace->vm_map.pmap)); 372 } 373 374 static __inline int 375 pmap_l3_valid(pt_entry_t l3) 376 { 377 378 return (l3 & PTE_V); 379 } 380 381 static __inline int 382 pmap_l3_valid_cacheable(pt_entry_t l3) 383 { 384 385 /* TODO */ 386 387 return (0); 388 } 389 390 #define PTE_SYNC(pte) cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte)) 391 392 /* Checks if the page is dirty. */ 393 static inline int 394 pmap_page_dirty(pt_entry_t pte) 395 { 396 397 return (pte & PTE_D); 398 } 399 400 static __inline void 401 pmap_resident_count_inc(pmap_t pmap, int count) 402 { 403 404 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 405 pmap->pm_stats.resident_count += count; 406 } 407 408 static __inline void 409 pmap_resident_count_dec(pmap_t pmap, int count) 410 { 411 412 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 413 KASSERT(pmap->pm_stats.resident_count >= count, 414 ("pmap %p resident count underflow %ld %d", pmap, 415 pmap->pm_stats.resident_count, count)); 416 pmap->pm_stats.resident_count -= count; 417 } 418 419 static void 420 pmap_distribute_l1(struct pmap *pmap, vm_pindex_t l1index, 421 pt_entry_t entry) 422 { 423 struct pmap *user_pmap; 424 pd_entry_t *l1; 425 426 /* Distribute new kernel L1 entry to all the user pmaps */ 427 if (pmap != kernel_pmap) 428 return; 429 430 LIST_FOREACH(user_pmap, &allpmaps, pm_list) { 431 l1 = &user_pmap->pm_l1[l1index]; 432 if (entry) 433 pmap_load_store(l1, entry); 434 else 435 pmap_load_clear(l1); 436 } 437 } 438 439 static pt_entry_t * 440 pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, 441 u_int *l2_slot) 442 { 443 pt_entry_t *l2; 444 pd_entry_t *l1; 445 446 l1 = (pd_entry_t *)l1pt; 447 *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; 448 449 /* Check locore has used a table L1 map */ 450 KASSERT((l1[*l1_slot] & PTE_RX) == 0, 451 ("Invalid bootstrap L1 table")); 452 453 /* Find the address of the L2 table */ 454 l2 = (pt_entry_t *)init_pt_va; 455 *l2_slot = pmap_l2_index(va); 456 457 return (l2); 458 } 459 460 static vm_paddr_t 461 pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) 462 { 463 u_int l1_slot, l2_slot; 464 pt_entry_t *l2; 465 u_int ret; 466 467 l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); 468 469 /* Check locore has used L2 superpages */ 470 KASSERT((l2[l2_slot] & PTE_RX) != 0, 471 ("Invalid bootstrap L2 table")); 472 473 /* L2 is superpages */ 474 ret = (l2[l2_slot] >> PTE_PPN1_S) << L2_SHIFT; 475 ret += (va & L2_OFFSET); 476 477 return (ret); 478 } 479 480 static void 481 pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa) 482 { 483 vm_offset_t va; 484 vm_paddr_t pa; 485 pd_entry_t *l1; 486 u_int l1_slot; 487 pt_entry_t entry; 488 pn_t pn; 489 490 pa = dmap_phys_base = min_pa & ~L1_OFFSET; 491 va = DMAP_MIN_ADDRESS; 492 l1 = (pd_entry_t *)kern_l1; 493 l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS); 494 495 for (; va < DMAP_MAX_ADDRESS && pa < max_pa; 496 pa += L1_SIZE, va += L1_SIZE, l1_slot++) { 497 KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); 498 499 /* superpages */ 500 pn = (pa / PAGE_SIZE); 501 entry = (PTE_V | PTE_RWX); 502 entry |= (pn << PTE_PPN0_S); 503 pmap_load_store(&l1[l1_slot], entry); 504 } 505 506 /* Set the upper limit of the DMAP region */ 507 dmap_phys_max = pa; 508 dmap_max_addr = va; 509 510 cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); 511 cpu_tlb_flushID(); 512 } 513 514 static vm_offset_t 515 pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) 516 { 517 vm_offset_t l2pt, l3pt; 518 pt_entry_t entry; 519 pd_entry_t *l2; 520 vm_paddr_t pa; 521 u_int l2_slot; 522 pn_t pn; 523 524 KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); 525 526 l2 = pmap_l2(kernel_pmap, va); 527 l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1)); 528 l2pt = (vm_offset_t)l2; 529 l2_slot = pmap_l2_index(va); 530 l3pt = l3_start; 531 532 for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { 533 KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); 534 535 pa = pmap_early_vtophys(l1pt, l3pt); 536 pn = (pa / PAGE_SIZE); 537 entry = (PTE_V); 538 entry |= (pn << PTE_PPN0_S); 539 pmap_load_store(&l2[l2_slot], entry); 540 l3pt += PAGE_SIZE; 541 } 542 543 544 /* Clean the L2 page table */ 545 memset((void *)l3_start, 0, l3pt - l3_start); 546 cpu_dcache_wb_range(l3_start, l3pt - l3_start); 547 548 cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); 549 550 return (l3pt); 551 } 552 553 /* 554 * Bootstrap the system enough to run with virtual memory. 555 */ 556 void 557 pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) 558 { 559 u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot; 560 uint64_t kern_delta; 561 pt_entry_t *l2; 562 vm_offset_t va, freemempos; 563 vm_offset_t dpcpu, msgbufpv; 564 vm_paddr_t pa, min_pa, max_pa; 565 int i; 566 567 kern_delta = KERNBASE - kernstart; 568 physmem = 0; 569 570 printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); 571 printf("%lx\n", l1pt); 572 printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); 573 574 /* Set this early so we can use the pagetable walking functions */ 575 kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt; 576 PMAP_LOCK_INIT(kernel_pmap); 577 578 /* 579 * Initialize the global pv list lock. 580 */ 581 rw_init(&pvh_global_lock, "pmap pv global"); 582 583 LIST_INIT(&allpmaps); 584 585 /* Assume the address we were loaded to is a valid physical address */ 586 min_pa = max_pa = KERNBASE - kern_delta; 587 588 /* 589 * Find the minimum physical address. physmap is sorted, 590 * but may contain empty ranges. 591 */ 592 for (i = 0; i < (physmap_idx * 2); i += 2) { 593 if (physmap[i] == physmap[i + 1]) 594 continue; 595 if (physmap[i] <= min_pa) 596 min_pa = physmap[i]; 597 if (physmap[i + 1] > max_pa) 598 max_pa = physmap[i + 1]; 599 break; 600 } 601 602 /* Create a direct map region early so we can use it for pa -> va */ 603 pmap_bootstrap_dmap(l1pt, min_pa, max_pa); 604 605 va = KERNBASE; 606 pa = KERNBASE - kern_delta; 607 608 /* 609 * Start to initialize phys_avail by copying from physmap 610 * up to the physical address KERNBASE points at. 611 */ 612 map_slot = avail_slot = 0; 613 for (; map_slot < (physmap_idx * 2); map_slot += 2) { 614 if (physmap[map_slot] == physmap[map_slot + 1]) 615 continue; 616 617 if (physmap[map_slot] <= pa && 618 physmap[map_slot + 1] > pa) 619 break; 620 621 phys_avail[avail_slot] = physmap[map_slot]; 622 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 623 physmem += (phys_avail[avail_slot + 1] - 624 phys_avail[avail_slot]) >> PAGE_SHIFT; 625 avail_slot += 2; 626 } 627 628 /* Add the memory before the kernel */ 629 if (physmap[avail_slot] < pa) { 630 phys_avail[avail_slot] = physmap[map_slot]; 631 phys_avail[avail_slot + 1] = pa; 632 physmem += (phys_avail[avail_slot + 1] - 633 phys_avail[avail_slot]) >> PAGE_SHIFT; 634 avail_slot += 2; 635 } 636 used_map_slot = map_slot; 637 638 /* 639 * Read the page table to find out what is already mapped. 640 * This assumes we have mapped a block of memory from KERNBASE 641 * using a single L1 entry. 642 */ 643 l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); 644 645 /* Sanity check the index, KERNBASE should be the first VA */ 646 KASSERT(l2_slot == 0, ("The L2 index is non-zero")); 647 648 /* Find how many pages we have mapped */ 649 for (; l2_slot < Ln_ENTRIES; l2_slot++) { 650 if ((l2[l2_slot] & PTE_V) == 0) 651 break; 652 653 /* Check locore used L2 superpages */ 654 KASSERT((l2[l2_slot] & PTE_RX) != 0, 655 ("Invalid bootstrap L2 table")); 656 657 va += L2_SIZE; 658 pa += L2_SIZE; 659 } 660 661 va = roundup2(va, L2_SIZE); 662 663 freemempos = KERNBASE + kernlen; 664 freemempos = roundup2(freemempos, PAGE_SIZE); 665 666 /* Create the l3 tables for the early devmap */ 667 freemempos = pmap_bootstrap_l3(l1pt, 668 VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos); 669 670 cpu_tlb_flushID(); 671 672 #define alloc_pages(var, np) \ 673 (var) = freemempos; \ 674 freemempos += (np * PAGE_SIZE); \ 675 memset((char *)(var), 0, ((np) * PAGE_SIZE)); 676 677 /* Allocate dynamic per-cpu area. */ 678 alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); 679 dpcpu_init((void *)dpcpu, 0); 680 681 /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ 682 alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); 683 msgbufp = (void *)msgbufpv; 684 685 virtual_avail = roundup2(freemempos, L2_SIZE); 686 virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; 687 kernel_vm_end = virtual_avail; 688 689 pa = pmap_early_vtophys(l1pt, freemempos); 690 691 /* Finish initialising physmap */ 692 map_slot = used_map_slot; 693 for (; avail_slot < (PHYS_AVAIL_SIZE - 2) && 694 map_slot < (physmap_idx * 2); map_slot += 2) { 695 if (physmap[map_slot] == physmap[map_slot + 1]) { 696 continue; 697 } 698 699 /* Have we used the current range? */ 700 if (physmap[map_slot + 1] <= pa) { 701 continue; 702 } 703 704 /* Do we need to split the entry? */ 705 if (physmap[map_slot] < pa) { 706 phys_avail[avail_slot] = pa; 707 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 708 } else { 709 phys_avail[avail_slot] = physmap[map_slot]; 710 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 711 } 712 physmem += (phys_avail[avail_slot + 1] - 713 phys_avail[avail_slot]) >> PAGE_SHIFT; 714 715 avail_slot += 2; 716 } 717 phys_avail[avail_slot] = 0; 718 phys_avail[avail_slot + 1] = 0; 719 720 /* 721 * Maxmem isn't the "maximum memory", it's one larger than the 722 * highest page of the physical address space. It should be 723 * called something like "Maxphyspage". 724 */ 725 Maxmem = atop(phys_avail[avail_slot - 1]); 726 727 cpu_tlb_flushID(); 728 } 729 730 /* 731 * Initialize a vm_page's machine-dependent fields. 732 */ 733 void 734 pmap_page_init(vm_page_t m) 735 { 736 737 TAILQ_INIT(&m->md.pv_list); 738 m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; 739 } 740 741 /* 742 * Initialize the pmap module. 743 * Called by vm_init, to initialize any structures that the pmap 744 * system needs to map virtual memory. 745 */ 746 void 747 pmap_init(void) 748 { 749 int i; 750 751 /* 752 * Initialize the pv chunk list mutex. 753 */ 754 mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); 755 756 /* 757 * Initialize the pool of pv list locks. 758 */ 759 for (i = 0; i < NPV_LIST_LOCKS; i++) 760 rw_init(&pv_list_locks[i], "pmap pv list"); 761 } 762 763 /* 764 * Normal, non-SMP, invalidation functions. 765 * We inline these within pmap.c for speed. 766 */ 767 PMAP_INLINE void 768 pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 769 { 770 771 /* TODO */ 772 773 sched_pin(); 774 __asm __volatile("sfence.vm"); 775 sched_unpin(); 776 } 777 778 PMAP_INLINE void 779 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 780 { 781 782 /* TODO */ 783 784 sched_pin(); 785 __asm __volatile("sfence.vm"); 786 sched_unpin(); 787 } 788 789 PMAP_INLINE void 790 pmap_invalidate_all(pmap_t pmap) 791 { 792 793 /* TODO */ 794 795 sched_pin(); 796 __asm __volatile("sfence.vm"); 797 sched_unpin(); 798 } 799 800 /* 801 * Routine: pmap_extract 802 * Function: 803 * Extract the physical page address associated 804 * with the given map/virtual_address pair. 805 */ 806 vm_paddr_t 807 pmap_extract(pmap_t pmap, vm_offset_t va) 808 { 809 pd_entry_t *l2p, l2; 810 pt_entry_t *l3p, l3; 811 vm_paddr_t pa; 812 813 pa = 0; 814 PMAP_LOCK(pmap); 815 /* 816 * Start with the l2 tabel. We are unable to allocate 817 * pages in the l1 table. 818 */ 819 l2p = pmap_l2(pmap, va); 820 if (l2p != NULL) { 821 l2 = pmap_load(l2p); 822 if ((l2 & PTE_RX) == 0) { 823 l3p = pmap_l2_to_l3(l2p, va); 824 if (l3p != NULL) { 825 l3 = pmap_load(l3p); 826 pa = PTE_TO_PHYS(l3); 827 pa |= (va & L3_OFFSET); 828 } 829 } else { 830 /* L2 is superpages */ 831 pa = (l2 >> PTE_PPN1_S) << L2_SHIFT; 832 pa |= (va & L2_OFFSET); 833 } 834 } 835 PMAP_UNLOCK(pmap); 836 return (pa); 837 } 838 839 /* 840 * Routine: pmap_extract_and_hold 841 * Function: 842 * Atomically extract and hold the physical page 843 * with the given pmap and virtual address pair 844 * if that mapping permits the given protection. 845 */ 846 vm_page_t 847 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 848 { 849 pt_entry_t *l3p, l3; 850 vm_paddr_t phys; 851 vm_paddr_t pa; 852 vm_page_t m; 853 854 pa = 0; 855 m = NULL; 856 PMAP_LOCK(pmap); 857 retry: 858 l3p = pmap_l3(pmap, va); 859 if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) { 860 if ((pmap_is_write(l3)) || ((prot & VM_PROT_WRITE) == 0)) { 861 phys = PTE_TO_PHYS(l3); 862 if (vm_page_pa_tryrelock(pmap, phys, &pa)) 863 goto retry; 864 m = PHYS_TO_VM_PAGE(phys); 865 vm_page_hold(m); 866 } 867 } 868 PA_UNLOCK_COND(pa); 869 PMAP_UNLOCK(pmap); 870 return (m); 871 } 872 873 vm_paddr_t 874 pmap_kextract(vm_offset_t va) 875 { 876 pd_entry_t *l2; 877 pt_entry_t *l3; 878 vm_paddr_t pa; 879 880 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 881 pa = DMAP_TO_PHYS(va); 882 } else { 883 l2 = pmap_l2(kernel_pmap, va); 884 if (l2 == NULL) 885 panic("pmap_kextract: No l2"); 886 if ((pmap_load(l2) & PTE_RX) != 0) { 887 /* superpages */ 888 pa = (pmap_load(l2) >> PTE_PPN1_S) << L2_SHIFT; 889 pa |= (va & L2_OFFSET); 890 return (pa); 891 } 892 893 l3 = pmap_l2_to_l3(l2, va); 894 if (l3 == NULL) 895 panic("pmap_kextract: No l3..."); 896 pa = PTE_TO_PHYS(pmap_load(l3)); 897 pa |= (va & PAGE_MASK); 898 } 899 return (pa); 900 } 901 902 /*************************************************** 903 * Low level mapping routines..... 904 ***************************************************/ 905 906 void 907 pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) 908 { 909 pt_entry_t entry; 910 pt_entry_t *l3; 911 vm_offset_t va; 912 pn_t pn; 913 914 KASSERT((pa & L3_OFFSET) == 0, 915 ("pmap_kenter_device: Invalid physical address")); 916 KASSERT((sva & L3_OFFSET) == 0, 917 ("pmap_kenter_device: Invalid virtual address")); 918 KASSERT((size & PAGE_MASK) == 0, 919 ("pmap_kenter_device: Mapping is not page-sized")); 920 921 va = sva; 922 while (size != 0) { 923 l3 = pmap_l3(kernel_pmap, va); 924 KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); 925 926 pn = (pa / PAGE_SIZE); 927 entry = (PTE_V | PTE_RWX); 928 entry |= (pn << PTE_PPN0_S); 929 pmap_load_store(l3, entry); 930 931 PTE_SYNC(l3); 932 933 va += PAGE_SIZE; 934 pa += PAGE_SIZE; 935 size -= PAGE_SIZE; 936 } 937 pmap_invalidate_range(kernel_pmap, sva, va); 938 } 939 940 /* 941 * Remove a page from the kernel pagetables. 942 * Note: not SMP coherent. 943 */ 944 PMAP_INLINE void 945 pmap_kremove(vm_offset_t va) 946 { 947 pt_entry_t *l3; 948 949 l3 = pmap_l3(kernel_pmap, va); 950 KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); 951 952 if (pmap_l3_valid_cacheable(pmap_load(l3))) 953 cpu_dcache_wb_range(va, L3_SIZE); 954 pmap_load_clear(l3); 955 PTE_SYNC(l3); 956 pmap_invalidate_page(kernel_pmap, va); 957 } 958 959 void 960 pmap_kremove_device(vm_offset_t sva, vm_size_t size) 961 { 962 pt_entry_t *l3; 963 vm_offset_t va; 964 965 KASSERT((sva & L3_OFFSET) == 0, 966 ("pmap_kremove_device: Invalid virtual address")); 967 KASSERT((size & PAGE_MASK) == 0, 968 ("pmap_kremove_device: Mapping is not page-sized")); 969 970 va = sva; 971 while (size != 0) { 972 l3 = pmap_l3(kernel_pmap, va); 973 KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); 974 pmap_load_clear(l3); 975 PTE_SYNC(l3); 976 977 va += PAGE_SIZE; 978 size -= PAGE_SIZE; 979 } 980 pmap_invalidate_range(kernel_pmap, sva, va); 981 } 982 983 /* 984 * Used to map a range of physical addresses into kernel 985 * virtual address space. 986 * 987 * The value passed in '*virt' is a suggested virtual address for 988 * the mapping. Architectures which can support a direct-mapped 989 * physical to virtual region can return the appropriate address 990 * within that region, leaving '*virt' unchanged. Other 991 * architectures should map the pages starting at '*virt' and 992 * update '*virt' with the first usable address after the mapped 993 * region. 994 */ 995 vm_offset_t 996 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 997 { 998 999 return PHYS_TO_DMAP(start); 1000 } 1001 1002 1003 /* 1004 * Add a list of wired pages to the kva 1005 * this routine is only used for temporary 1006 * kernel mappings that do not need to have 1007 * page modification or references recorded. 1008 * Note that old mappings are simply written 1009 * over. The page *must* be wired. 1010 * Note: SMP coherent. Uses a ranged shootdown IPI. 1011 */ 1012 void 1013 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1014 { 1015 pt_entry_t *l3, pa; 1016 vm_offset_t va; 1017 vm_page_t m; 1018 pt_entry_t entry; 1019 pn_t pn; 1020 int i; 1021 1022 va = sva; 1023 for (i = 0; i < count; i++) { 1024 m = ma[i]; 1025 pa = VM_PAGE_TO_PHYS(m); 1026 pn = (pa / PAGE_SIZE); 1027 l3 = pmap_l3(kernel_pmap, va); 1028 1029 entry = (PTE_V | PTE_RWX); 1030 entry |= (pn << PTE_PPN0_S); 1031 pmap_load_store(l3, entry); 1032 1033 PTE_SYNC(l3); 1034 va += L3_SIZE; 1035 } 1036 pmap_invalidate_range(kernel_pmap, sva, va); 1037 } 1038 1039 /* 1040 * This routine tears out page mappings from the 1041 * kernel -- it is meant only for temporary mappings. 1042 * Note: SMP coherent. Uses a ranged shootdown IPI. 1043 */ 1044 void 1045 pmap_qremove(vm_offset_t sva, int count) 1046 { 1047 pt_entry_t *l3; 1048 vm_offset_t va; 1049 1050 KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); 1051 1052 va = sva; 1053 while (count-- > 0) { 1054 l3 = pmap_l3(kernel_pmap, va); 1055 KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); 1056 1057 if (pmap_l3_valid_cacheable(pmap_load(l3))) 1058 cpu_dcache_wb_range(va, L3_SIZE); 1059 pmap_load_clear(l3); 1060 PTE_SYNC(l3); 1061 1062 va += PAGE_SIZE; 1063 } 1064 pmap_invalidate_range(kernel_pmap, sva, va); 1065 } 1066 1067 /*************************************************** 1068 * Page table page management routines..... 1069 ***************************************************/ 1070 static __inline void 1071 pmap_free_zero_pages(struct spglist *free) 1072 { 1073 vm_page_t m; 1074 1075 while ((m = SLIST_FIRST(free)) != NULL) { 1076 SLIST_REMOVE_HEAD(free, plinks.s.ss); 1077 /* Preserve the page's PG_ZERO setting. */ 1078 vm_page_free_toq(m); 1079 } 1080 } 1081 1082 /* 1083 * Schedule the specified unused page table page to be freed. Specifically, 1084 * add the page to the specified list of pages that will be released to the 1085 * physical memory manager after the TLB has been updated. 1086 */ 1087 static __inline void 1088 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, 1089 boolean_t set_PG_ZERO) 1090 { 1091 1092 if (set_PG_ZERO) 1093 m->flags |= PG_ZERO; 1094 else 1095 m->flags &= ~PG_ZERO; 1096 SLIST_INSERT_HEAD(free, m, plinks.s.ss); 1097 } 1098 1099 /* 1100 * Decrements a page table page's wire count, which is used to record the 1101 * number of valid page table entries within the page. If the wire count 1102 * drops to zero, then the page table page is unmapped. Returns TRUE if the 1103 * page table page was unmapped and FALSE otherwise. 1104 */ 1105 static inline boolean_t 1106 pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1107 { 1108 1109 --m->wire_count; 1110 if (m->wire_count == 0) { 1111 _pmap_unwire_l3(pmap, va, m, free); 1112 return (TRUE); 1113 } else { 1114 return (FALSE); 1115 } 1116 } 1117 1118 static void 1119 _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1120 { 1121 vm_paddr_t phys; 1122 1123 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1124 /* 1125 * unmap the page table page 1126 */ 1127 if (m->pindex >= NUPDE) { 1128 /* PD page */ 1129 pd_entry_t *l1; 1130 l1 = pmap_l1(pmap, va); 1131 pmap_load_clear(l1); 1132 pmap_distribute_l1(pmap, pmap_l1_index(va), 0); 1133 PTE_SYNC(l1); 1134 } else { 1135 /* PTE page */ 1136 pd_entry_t *l2; 1137 l2 = pmap_l2(pmap, va); 1138 pmap_load_clear(l2); 1139 PTE_SYNC(l2); 1140 } 1141 pmap_resident_count_dec(pmap, 1); 1142 if (m->pindex < NUPDE) { 1143 pd_entry_t *l1; 1144 /* We just released a PT, unhold the matching PD */ 1145 vm_page_t pdpg; 1146 1147 l1 = pmap_l1(pmap, va); 1148 phys = PTE_TO_PHYS(pmap_load(l1)); 1149 pdpg = PHYS_TO_VM_PAGE(phys); 1150 pmap_unwire_l3(pmap, va, pdpg, free); 1151 } 1152 pmap_invalidate_page(pmap, va); 1153 1154 /* 1155 * This is a release store so that the ordinary store unmapping 1156 * the page table page is globally performed before TLB shoot- 1157 * down is begun. 1158 */ 1159 atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); 1160 1161 /* 1162 * Put page on a list so that it is released after 1163 * *ALL* TLB shootdown is done 1164 */ 1165 pmap_add_delayed_free_list(m, free, TRUE); 1166 } 1167 1168 /* 1169 * After removing an l3 entry, this routine is used to 1170 * conditionally free the page, and manage the hold/wire counts. 1171 */ 1172 static int 1173 pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, 1174 struct spglist *free) 1175 { 1176 vm_paddr_t phys; 1177 vm_page_t mpte; 1178 1179 if (va >= VM_MAXUSER_ADDRESS) 1180 return (0); 1181 KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); 1182 1183 phys = PTE_TO_PHYS(ptepde); 1184 1185 mpte = PHYS_TO_VM_PAGE(phys); 1186 return (pmap_unwire_l3(pmap, va, mpte, free)); 1187 } 1188 1189 void 1190 pmap_pinit0(pmap_t pmap) 1191 { 1192 1193 PMAP_LOCK_INIT(pmap); 1194 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1195 pmap->pm_l1 = kernel_pmap->pm_l1; 1196 } 1197 1198 int 1199 pmap_pinit(pmap_t pmap) 1200 { 1201 vm_paddr_t l1phys; 1202 vm_page_t l1pt; 1203 1204 /* 1205 * allocate the l1 page 1206 */ 1207 while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL | 1208 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) 1209 VM_WAIT; 1210 1211 l1phys = VM_PAGE_TO_PHYS(l1pt); 1212 pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys); 1213 1214 if ((l1pt->flags & PG_ZERO) == 0) 1215 pagezero(pmap->pm_l1); 1216 1217 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1218 1219 /* Install kernel pagetables */ 1220 memcpy(pmap->pm_l1, kernel_pmap->pm_l1, PAGE_SIZE); 1221 1222 /* Add to the list of all user pmaps */ 1223 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1224 1225 return (1); 1226 } 1227 1228 /* 1229 * This routine is called if the desired page table page does not exist. 1230 * 1231 * If page table page allocation fails, this routine may sleep before 1232 * returning NULL. It sleeps only if a lock pointer was given. 1233 * 1234 * Note: If a page allocation fails at page table level two or three, 1235 * one or two pages may be held during the wait, only to be released 1236 * afterwards. This conservative approach is easily argued to avoid 1237 * race conditions. 1238 */ 1239 static vm_page_t 1240 _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) 1241 { 1242 vm_page_t m, /*pdppg, */pdpg; 1243 pt_entry_t entry; 1244 vm_paddr_t phys; 1245 pn_t pn; 1246 1247 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1248 1249 /* 1250 * Allocate a page table page. 1251 */ 1252 if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1253 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1254 if (lockp != NULL) { 1255 RELEASE_PV_LIST_LOCK(lockp); 1256 PMAP_UNLOCK(pmap); 1257 rw_runlock(&pvh_global_lock); 1258 VM_WAIT; 1259 rw_rlock(&pvh_global_lock); 1260 PMAP_LOCK(pmap); 1261 } 1262 1263 /* 1264 * Indicate the need to retry. While waiting, the page table 1265 * page may have been allocated. 1266 */ 1267 return (NULL); 1268 } 1269 1270 if ((m->flags & PG_ZERO) == 0) 1271 pmap_zero_page(m); 1272 1273 /* 1274 * Map the pagetable page into the process address space, if 1275 * it isn't already there. 1276 */ 1277 1278 if (ptepindex >= NUPDE) { 1279 pd_entry_t *l1; 1280 vm_pindex_t l1index; 1281 1282 l1index = ptepindex - NUPDE; 1283 l1 = &pmap->pm_l1[l1index]; 1284 1285 pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE); 1286 entry = (PTE_V); 1287 entry |= (pn << PTE_PPN0_S); 1288 pmap_load_store(l1, entry); 1289 pmap_distribute_l1(pmap, l1index, entry); 1290 1291 PTE_SYNC(l1); 1292 1293 } else { 1294 vm_pindex_t l1index; 1295 pd_entry_t *l1, *l2; 1296 1297 l1index = ptepindex >> (L1_SHIFT - L2_SHIFT); 1298 l1 = &pmap->pm_l1[l1index]; 1299 if (pmap_load(l1) == 0) { 1300 /* recurse for allocating page dir */ 1301 if (_pmap_alloc_l3(pmap, NUPDE + l1index, 1302 lockp) == NULL) { 1303 --m->wire_count; 1304 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1305 vm_page_free_zero(m); 1306 return (NULL); 1307 } 1308 } else { 1309 phys = PTE_TO_PHYS(pmap_load(l1)); 1310 pdpg = PHYS_TO_VM_PAGE(phys); 1311 pdpg->wire_count++; 1312 } 1313 1314 phys = PTE_TO_PHYS(pmap_load(l1)); 1315 l2 = (pd_entry_t *)PHYS_TO_DMAP(phys); 1316 l2 = &l2[ptepindex & Ln_ADDR_MASK]; 1317 1318 pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE); 1319 entry = (PTE_V); 1320 entry |= (pn << PTE_PPN0_S); 1321 pmap_load_store(l2, entry); 1322 1323 PTE_SYNC(l2); 1324 } 1325 1326 pmap_resident_count_inc(pmap, 1); 1327 1328 return (m); 1329 } 1330 1331 static vm_page_t 1332 pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) 1333 { 1334 vm_pindex_t ptepindex; 1335 pd_entry_t *l2; 1336 vm_paddr_t phys; 1337 vm_page_t m; 1338 1339 /* 1340 * Calculate pagetable page index 1341 */ 1342 ptepindex = pmap_l2_pindex(va); 1343 retry: 1344 /* 1345 * Get the page directory entry 1346 */ 1347 l2 = pmap_l2(pmap, va); 1348 1349 /* 1350 * If the page table page is mapped, we just increment the 1351 * hold count, and activate it. 1352 */ 1353 if (l2 != NULL && pmap_load(l2) != 0) { 1354 phys = PTE_TO_PHYS(pmap_load(l2)); 1355 m = PHYS_TO_VM_PAGE(phys); 1356 m->wire_count++; 1357 } else { 1358 /* 1359 * Here if the pte page isn't mapped, or if it has been 1360 * deallocated. 1361 */ 1362 m = _pmap_alloc_l3(pmap, ptepindex, lockp); 1363 if (m == NULL && lockp != NULL) 1364 goto retry; 1365 } 1366 return (m); 1367 } 1368 1369 1370 /*************************************************** 1371 * Pmap allocation/deallocation routines. 1372 ***************************************************/ 1373 1374 /* 1375 * Release any resources held by the given physical map. 1376 * Called when a pmap initialized by pmap_pinit is being released. 1377 * Should only be called if the map contains no valid mappings. 1378 */ 1379 void 1380 pmap_release(pmap_t pmap) 1381 { 1382 vm_page_t m; 1383 1384 KASSERT(pmap->pm_stats.resident_count == 0, 1385 ("pmap_release: pmap resident count %ld != 0", 1386 pmap->pm_stats.resident_count)); 1387 1388 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1)); 1389 m->wire_count--; 1390 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1391 vm_page_free_zero(m); 1392 1393 /* Remove pmap from the allpmaps list */ 1394 LIST_REMOVE(pmap, pm_list); 1395 1396 /* Remove kernel pagetables */ 1397 bzero(pmap->pm_l1, PAGE_SIZE); 1398 } 1399 1400 #if 0 1401 static int 1402 kvm_size(SYSCTL_HANDLER_ARGS) 1403 { 1404 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; 1405 1406 return sysctl_handle_long(oidp, &ksize, 0, req); 1407 } 1408 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1409 0, 0, kvm_size, "LU", "Size of KVM"); 1410 1411 static int 1412 kvm_free(SYSCTL_HANDLER_ARGS) 1413 { 1414 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1415 1416 return sysctl_handle_long(oidp, &kfree, 0, req); 1417 } 1418 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1419 0, 0, kvm_free, "LU", "Amount of KVM free"); 1420 #endif /* 0 */ 1421 1422 /* 1423 * grow the number of kernel page table entries, if needed 1424 */ 1425 void 1426 pmap_growkernel(vm_offset_t addr) 1427 { 1428 vm_paddr_t paddr; 1429 vm_page_t nkpg; 1430 pd_entry_t *l1, *l2; 1431 pt_entry_t entry; 1432 pn_t pn; 1433 1434 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1435 1436 addr = roundup2(addr, L2_SIZE); 1437 if (addr - 1 >= kernel_map->max_offset) 1438 addr = kernel_map->max_offset; 1439 while (kernel_vm_end < addr) { 1440 l1 = pmap_l1(kernel_pmap, kernel_vm_end); 1441 if (pmap_load(l1) == 0) { 1442 /* We need a new PDP entry */ 1443 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, 1444 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | 1445 VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1446 if (nkpg == NULL) 1447 panic("pmap_growkernel: no memory to grow kernel"); 1448 if ((nkpg->flags & PG_ZERO) == 0) 1449 pmap_zero_page(nkpg); 1450 paddr = VM_PAGE_TO_PHYS(nkpg); 1451 1452 pn = (paddr / PAGE_SIZE); 1453 entry = (PTE_V); 1454 entry |= (pn << PTE_PPN0_S); 1455 pmap_load_store(l1, entry); 1456 pmap_distribute_l1(kernel_pmap, 1457 pmap_l1_index(kernel_vm_end), entry); 1458 1459 PTE_SYNC(l1); 1460 continue; /* try again */ 1461 } 1462 l2 = pmap_l1_to_l2(l1, kernel_vm_end); 1463 if ((pmap_load(l2) & PTE_A) != 0) { 1464 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1465 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1466 kernel_vm_end = kernel_map->max_offset; 1467 break; 1468 } 1469 continue; 1470 } 1471 1472 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, 1473 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1474 VM_ALLOC_ZERO); 1475 if (nkpg == NULL) 1476 panic("pmap_growkernel: no memory to grow kernel"); 1477 if ((nkpg->flags & PG_ZERO) == 0) { 1478 pmap_zero_page(nkpg); 1479 } 1480 paddr = VM_PAGE_TO_PHYS(nkpg); 1481 1482 pn = (paddr / PAGE_SIZE); 1483 entry = (PTE_V); 1484 entry |= (pn << PTE_PPN0_S); 1485 pmap_load_store(l2, entry); 1486 1487 PTE_SYNC(l2); 1488 pmap_invalidate_page(kernel_pmap, kernel_vm_end); 1489 1490 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1491 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1492 kernel_vm_end = kernel_map->max_offset; 1493 break; 1494 } 1495 } 1496 } 1497 1498 1499 /*************************************************** 1500 * page management routines. 1501 ***************************************************/ 1502 1503 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1504 CTASSERT(_NPCM == 3); 1505 CTASSERT(_NPCPV == 168); 1506 1507 static __inline struct pv_chunk * 1508 pv_to_chunk(pv_entry_t pv) 1509 { 1510 1511 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 1512 } 1513 1514 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1515 1516 #define PC_FREE0 0xfffffffffffffffful 1517 #define PC_FREE1 0xfffffffffffffffful 1518 #define PC_FREE2 0x000000fffffffffful 1519 1520 static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; 1521 1522 #if 0 1523 #ifdef PV_STATS 1524 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1525 1526 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1527 "Current number of pv entry chunks"); 1528 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1529 "Current number of pv entry chunks allocated"); 1530 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1531 "Current number of pv entry chunks frees"); 1532 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1533 "Number of times tried to get a chunk page but failed."); 1534 1535 static long pv_entry_frees, pv_entry_allocs, pv_entry_count; 1536 static int pv_entry_spare; 1537 1538 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1539 "Current number of pv entry frees"); 1540 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1541 "Current number of pv entry allocs"); 1542 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1543 "Current number of pv entries"); 1544 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1545 "Current number of spare pv entries"); 1546 #endif 1547 #endif /* 0 */ 1548 1549 /* 1550 * We are in a serious low memory condition. Resort to 1551 * drastic measures to free some pages so we can allocate 1552 * another pv entry chunk. 1553 * 1554 * Returns NULL if PV entries were reclaimed from the specified pmap. 1555 * 1556 * We do not, however, unmap 2mpages because subsequent accesses will 1557 * allocate per-page pv entries until repromotion occurs, thereby 1558 * exacerbating the shortage of free pv entries. 1559 */ 1560 static vm_page_t 1561 reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) 1562 { 1563 1564 panic("RISCVTODO: reclaim_pv_chunk"); 1565 } 1566 1567 /* 1568 * free the pv_entry back to the free list 1569 */ 1570 static void 1571 free_pv_entry(pmap_t pmap, pv_entry_t pv) 1572 { 1573 struct pv_chunk *pc; 1574 int idx, field, bit; 1575 1576 rw_assert(&pvh_global_lock, RA_LOCKED); 1577 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1578 PV_STAT(atomic_add_long(&pv_entry_frees, 1)); 1579 PV_STAT(atomic_add_int(&pv_entry_spare, 1)); 1580 PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); 1581 pc = pv_to_chunk(pv); 1582 idx = pv - &pc->pc_pventry[0]; 1583 field = idx / 64; 1584 bit = idx % 64; 1585 pc->pc_map[field] |= 1ul << bit; 1586 if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || 1587 pc->pc_map[2] != PC_FREE2) { 1588 /* 98% of the time, pc is already at the head of the list. */ 1589 if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { 1590 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1591 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1592 } 1593 return; 1594 } 1595 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1596 free_pv_chunk(pc); 1597 } 1598 1599 static void 1600 free_pv_chunk(struct pv_chunk *pc) 1601 { 1602 vm_page_t m; 1603 1604 mtx_lock(&pv_chunks_mutex); 1605 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1606 mtx_unlock(&pv_chunks_mutex); 1607 PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); 1608 PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); 1609 PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); 1610 /* entire chunk is free, return it */ 1611 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); 1612 #if 0 /* TODO: For minidump */ 1613 dump_drop_page(m->phys_addr); 1614 #endif 1615 vm_page_unwire(m, PQ_INACTIVE); 1616 vm_page_free(m); 1617 } 1618 1619 /* 1620 * Returns a new PV entry, allocating a new PV chunk from the system when 1621 * needed. If this PV chunk allocation fails and a PV list lock pointer was 1622 * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is 1623 * returned. 1624 * 1625 * The given PV list lock may be released. 1626 */ 1627 static pv_entry_t 1628 get_pv_entry(pmap_t pmap, struct rwlock **lockp) 1629 { 1630 int bit, field; 1631 pv_entry_t pv; 1632 struct pv_chunk *pc; 1633 vm_page_t m; 1634 1635 rw_assert(&pvh_global_lock, RA_LOCKED); 1636 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1637 PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); 1638 retry: 1639 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 1640 if (pc != NULL) { 1641 for (field = 0; field < _NPCM; field++) { 1642 if (pc->pc_map[field]) { 1643 bit = ffsl(pc->pc_map[field]) - 1; 1644 break; 1645 } 1646 } 1647 if (field < _NPCM) { 1648 pv = &pc->pc_pventry[field * 64 + bit]; 1649 pc->pc_map[field] &= ~(1ul << bit); 1650 /* If this was the last item, move it to tail */ 1651 if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && 1652 pc->pc_map[2] == 0) { 1653 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1654 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, 1655 pc_list); 1656 } 1657 PV_STAT(atomic_add_long(&pv_entry_count, 1)); 1658 PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); 1659 return (pv); 1660 } 1661 } 1662 /* No free items, allocate another chunk */ 1663 m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 1664 VM_ALLOC_WIRED); 1665 if (m == NULL) { 1666 if (lockp == NULL) { 1667 PV_STAT(pc_chunk_tryfail++); 1668 return (NULL); 1669 } 1670 m = reclaim_pv_chunk(pmap, lockp); 1671 if (m == NULL) 1672 goto retry; 1673 } 1674 PV_STAT(atomic_add_int(&pc_chunk_count, 1)); 1675 PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); 1676 #if 0 /* TODO: This is for minidump */ 1677 dump_add_page(m->phys_addr); 1678 #endif 1679 pc = (void *)PHYS_TO_DMAP(m->phys_addr); 1680 pc->pc_pmap = pmap; 1681 pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ 1682 pc->pc_map[1] = PC_FREE1; 1683 pc->pc_map[2] = PC_FREE2; 1684 mtx_lock(&pv_chunks_mutex); 1685 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 1686 mtx_unlock(&pv_chunks_mutex); 1687 pv = &pc->pc_pventry[0]; 1688 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1689 PV_STAT(atomic_add_long(&pv_entry_count, 1)); 1690 PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); 1691 return (pv); 1692 } 1693 1694 /* 1695 * First find and then remove the pv entry for the specified pmap and virtual 1696 * address from the specified pv list. Returns the pv entry if found and NULL 1697 * otherwise. This operation can be performed on pv lists for either 4KB or 1698 * 2MB page mappings. 1699 */ 1700 static __inline pv_entry_t 1701 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1702 { 1703 pv_entry_t pv; 1704 1705 rw_assert(&pvh_global_lock, RA_LOCKED); 1706 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 1707 if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 1708 TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 1709 pvh->pv_gen++; 1710 break; 1711 } 1712 } 1713 return (pv); 1714 } 1715 1716 /* 1717 * First find and then destroy the pv entry for the specified pmap and virtual 1718 * address. This operation can be performed on pv lists for either 4KB or 2MB 1719 * page mappings. 1720 */ 1721 static void 1722 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1723 { 1724 pv_entry_t pv; 1725 1726 pv = pmap_pvh_remove(pvh, pmap, va); 1727 1728 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 1729 free_pv_entry(pmap, pv); 1730 } 1731 1732 /* 1733 * Conditionally create the PV entry for a 4KB page mapping if the required 1734 * memory can be allocated without resorting to reclamation. 1735 */ 1736 static boolean_t 1737 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, 1738 struct rwlock **lockp) 1739 { 1740 pv_entry_t pv; 1741 1742 rw_assert(&pvh_global_lock, RA_LOCKED); 1743 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1744 /* Pass NULL instead of the lock pointer to disable reclamation. */ 1745 if ((pv = get_pv_entry(pmap, NULL)) != NULL) { 1746 pv->pv_va = va; 1747 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1748 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 1749 m->md.pv_gen++; 1750 return (TRUE); 1751 } else 1752 return (FALSE); 1753 } 1754 1755 /* 1756 * pmap_remove_l3: do the things to unmap a page in a process 1757 */ 1758 static int 1759 pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, 1760 pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) 1761 { 1762 pt_entry_t old_l3; 1763 vm_paddr_t phys; 1764 vm_page_t m; 1765 1766 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1767 if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) 1768 cpu_dcache_wb_range(va, L3_SIZE); 1769 old_l3 = pmap_load_clear(l3); 1770 PTE_SYNC(l3); 1771 pmap_invalidate_page(pmap, va); 1772 if (old_l3 & PTE_SW_WIRED) 1773 pmap->pm_stats.wired_count -= 1; 1774 pmap_resident_count_dec(pmap, 1); 1775 if (old_l3 & PTE_SW_MANAGED) { 1776 phys = PTE_TO_PHYS(old_l3); 1777 m = PHYS_TO_VM_PAGE(phys); 1778 if (pmap_page_dirty(old_l3)) 1779 vm_page_dirty(m); 1780 if (old_l3 & PTE_A) 1781 vm_page_aflag_set(m, PGA_REFERENCED); 1782 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1783 pmap_pvh_free(&m->md, pmap, va); 1784 } 1785 1786 return (pmap_unuse_l3(pmap, va, l2e, free)); 1787 } 1788 1789 /* 1790 * Remove the given range of addresses from the specified map. 1791 * 1792 * It is assumed that the start and end are properly 1793 * rounded to the page size. 1794 */ 1795 void 1796 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1797 { 1798 struct rwlock *lock; 1799 vm_offset_t va, va_next; 1800 pd_entry_t *l1, *l2; 1801 pt_entry_t l3_pte, *l3; 1802 struct spglist free; 1803 int anyvalid; 1804 1805 /* 1806 * Perform an unsynchronized read. This is, however, safe. 1807 */ 1808 if (pmap->pm_stats.resident_count == 0) 1809 return; 1810 1811 anyvalid = 0; 1812 SLIST_INIT(&free); 1813 1814 rw_rlock(&pvh_global_lock); 1815 PMAP_LOCK(pmap); 1816 1817 lock = NULL; 1818 for (; sva < eva; sva = va_next) { 1819 if (pmap->pm_stats.resident_count == 0) 1820 break; 1821 1822 l1 = pmap_l1(pmap, sva); 1823 if (pmap_load(l1) == 0) { 1824 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 1825 if (va_next < sva) 1826 va_next = eva; 1827 continue; 1828 } 1829 1830 /* 1831 * Calculate index for next page table. 1832 */ 1833 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 1834 if (va_next < sva) 1835 va_next = eva; 1836 1837 l2 = pmap_l1_to_l2(l1, sva); 1838 if (l2 == NULL) 1839 continue; 1840 1841 l3_pte = pmap_load(l2); 1842 1843 /* 1844 * Weed out invalid mappings. 1845 */ 1846 if (l3_pte == 0) 1847 continue; 1848 if ((pmap_load(l2) & PTE_RX) != 0) 1849 continue; 1850 1851 /* 1852 * Limit our scan to either the end of the va represented 1853 * by the current page table page, or to the end of the 1854 * range being removed. 1855 */ 1856 if (va_next > eva) 1857 va_next = eva; 1858 1859 va = va_next; 1860 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 1861 sva += L3_SIZE) { 1862 if (l3 == NULL) 1863 panic("l3 == NULL"); 1864 if (pmap_load(l3) == 0) { 1865 if (va != va_next) { 1866 pmap_invalidate_range(pmap, va, sva); 1867 va = va_next; 1868 } 1869 continue; 1870 } 1871 if (va == va_next) 1872 va = sva; 1873 if (pmap_remove_l3(pmap, l3, sva, l3_pte, &free, 1874 &lock)) { 1875 sva += L3_SIZE; 1876 break; 1877 } 1878 } 1879 if (va != va_next) 1880 pmap_invalidate_range(pmap, va, sva); 1881 } 1882 if (lock != NULL) 1883 rw_wunlock(lock); 1884 if (anyvalid) 1885 pmap_invalidate_all(pmap); 1886 rw_runlock(&pvh_global_lock); 1887 PMAP_UNLOCK(pmap); 1888 pmap_free_zero_pages(&free); 1889 } 1890 1891 /* 1892 * Routine: pmap_remove_all 1893 * Function: 1894 * Removes this physical page from 1895 * all physical maps in which it resides. 1896 * Reflects back modify bits to the pager. 1897 * 1898 * Notes: 1899 * Original versions of this routine were very 1900 * inefficient because they iteratively called 1901 * pmap_remove (slow...) 1902 */ 1903 1904 void 1905 pmap_remove_all(vm_page_t m) 1906 { 1907 pv_entry_t pv; 1908 pmap_t pmap; 1909 pt_entry_t *l3, tl3; 1910 pd_entry_t *l2, tl2; 1911 struct spglist free; 1912 1913 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1914 ("pmap_remove_all: page %p is not managed", m)); 1915 SLIST_INIT(&free); 1916 rw_wlock(&pvh_global_lock); 1917 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1918 pmap = PV_PMAP(pv); 1919 PMAP_LOCK(pmap); 1920 pmap_resident_count_dec(pmap, 1); 1921 l2 = pmap_l2(pmap, pv->pv_va); 1922 KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found")); 1923 tl2 = pmap_load(l2); 1924 1925 KASSERT((tl2 & PTE_RX) == 0, 1926 ("pmap_remove_all: found a table when expecting " 1927 "a block in %p's pv list", m)); 1928 1929 l3 = pmap_l2_to_l3(l2, pv->pv_va); 1930 if (pmap_is_current(pmap) && 1931 pmap_l3_valid_cacheable(pmap_load(l3))) 1932 cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 1933 tl3 = pmap_load_clear(l3); 1934 PTE_SYNC(l3); 1935 pmap_invalidate_page(pmap, pv->pv_va); 1936 if (tl3 & PTE_SW_WIRED) 1937 pmap->pm_stats.wired_count--; 1938 if ((tl3 & PTE_A) != 0) 1939 vm_page_aflag_set(m, PGA_REFERENCED); 1940 1941 /* 1942 * Update the vm_page_t clean and reference bits. 1943 */ 1944 if (pmap_page_dirty(tl3)) 1945 vm_page_dirty(m); 1946 pmap_unuse_l3(pmap, pv->pv_va, pmap_load(l2), &free); 1947 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 1948 m->md.pv_gen++; 1949 free_pv_entry(pmap, pv); 1950 PMAP_UNLOCK(pmap); 1951 } 1952 vm_page_aflag_clear(m, PGA_WRITEABLE); 1953 rw_wunlock(&pvh_global_lock); 1954 pmap_free_zero_pages(&free); 1955 } 1956 1957 /* 1958 * Set the physical protection on the 1959 * specified range of this map as requested. 1960 */ 1961 void 1962 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1963 { 1964 vm_offset_t va, va_next; 1965 pd_entry_t *l1, *l2; 1966 pt_entry_t *l3p, l3; 1967 pt_entry_t entry; 1968 1969 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1970 pmap_remove(pmap, sva, eva); 1971 return; 1972 } 1973 1974 if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE) 1975 return; 1976 1977 PMAP_LOCK(pmap); 1978 for (; sva < eva; sva = va_next) { 1979 1980 l1 = pmap_l1(pmap, sva); 1981 if (pmap_load(l1) == 0) { 1982 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 1983 if (va_next < sva) 1984 va_next = eva; 1985 continue; 1986 } 1987 1988 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 1989 if (va_next < sva) 1990 va_next = eva; 1991 1992 l2 = pmap_l1_to_l2(l1, sva); 1993 if (l2 == NULL) 1994 continue; 1995 if (pmap_load(l2) == 0) 1996 continue; 1997 if ((pmap_load(l2) & PTE_RX) != 0) 1998 continue; 1999 2000 if (va_next > eva) 2001 va_next = eva; 2002 2003 va = va_next; 2004 for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, 2005 sva += L3_SIZE) { 2006 l3 = pmap_load(l3p); 2007 if (pmap_l3_valid(l3)) { 2008 entry = pmap_load(l3p); 2009 entry &= ~(PTE_W); 2010 pmap_load_store(l3p, entry); 2011 PTE_SYNC(l3p); 2012 /* XXX: Use pmap_invalidate_range */ 2013 pmap_invalidate_page(pmap, va); 2014 } 2015 } 2016 } 2017 PMAP_UNLOCK(pmap); 2018 2019 /* TODO: Only invalidate entries we are touching */ 2020 pmap_invalidate_all(pmap); 2021 } 2022 2023 /* 2024 * Insert the given physical page (p) at 2025 * the specified virtual address (v) in the 2026 * target physical map with the protection requested. 2027 * 2028 * If specified, the page will be wired down, meaning 2029 * that the related pte can not be reclaimed. 2030 * 2031 * NB: This is the only routine which MAY NOT lazy-evaluate 2032 * or lose information. That is, this routine must actually 2033 * insert this page into the given map NOW. 2034 */ 2035 int 2036 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2037 u_int flags, int8_t psind __unused) 2038 { 2039 struct rwlock *lock; 2040 pd_entry_t *l1, *l2; 2041 pt_entry_t new_l3, orig_l3; 2042 pt_entry_t *l3; 2043 pv_entry_t pv; 2044 vm_paddr_t opa, pa, l2_pa, l3_pa; 2045 vm_page_t mpte, om, l2_m, l3_m; 2046 boolean_t nosleep; 2047 pt_entry_t entry; 2048 pn_t l2_pn; 2049 pn_t l3_pn; 2050 pn_t pn; 2051 2052 va = trunc_page(va); 2053 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 2054 VM_OBJECT_ASSERT_LOCKED(m->object); 2055 pa = VM_PAGE_TO_PHYS(m); 2056 pn = (pa / PAGE_SIZE); 2057 2058 new_l3 = PTE_V | PTE_R | PTE_X; 2059 if (prot & VM_PROT_WRITE) 2060 new_l3 |= PTE_W; 2061 if ((va >> 63) == 0) 2062 new_l3 |= PTE_U; 2063 2064 new_l3 |= (pn << PTE_PPN0_S); 2065 if ((flags & PMAP_ENTER_WIRED) != 0) 2066 new_l3 |= PTE_SW_WIRED; 2067 2068 CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); 2069 2070 mpte = NULL; 2071 2072 lock = NULL; 2073 rw_rlock(&pvh_global_lock); 2074 PMAP_LOCK(pmap); 2075 2076 if (va < VM_MAXUSER_ADDRESS) { 2077 nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; 2078 mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); 2079 if (mpte == NULL && nosleep) { 2080 CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); 2081 if (lock != NULL) 2082 rw_wunlock(lock); 2083 rw_runlock(&pvh_global_lock); 2084 PMAP_UNLOCK(pmap); 2085 return (KERN_RESOURCE_SHORTAGE); 2086 } 2087 l3 = pmap_l3(pmap, va); 2088 } else { 2089 l3 = pmap_l3(pmap, va); 2090 /* TODO: This is not optimal, but should mostly work */ 2091 if (l3 == NULL) { 2092 l2 = pmap_l2(pmap, va); 2093 if (l2 == NULL) { 2094 l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2095 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2096 VM_ALLOC_ZERO); 2097 if (l2_m == NULL) 2098 panic("pmap_enter: l2 pte_m == NULL"); 2099 if ((l2_m->flags & PG_ZERO) == 0) 2100 pmap_zero_page(l2_m); 2101 2102 l2_pa = VM_PAGE_TO_PHYS(l2_m); 2103 l2_pn = (l2_pa / PAGE_SIZE); 2104 2105 l1 = pmap_l1(pmap, va); 2106 entry = (PTE_V); 2107 entry |= (l2_pn << PTE_PPN0_S); 2108 pmap_load_store(l1, entry); 2109 pmap_distribute_l1(pmap, pmap_l1_index(va), entry); 2110 PTE_SYNC(l1); 2111 2112 l2 = pmap_l1_to_l2(l1, va); 2113 } 2114 2115 KASSERT(l2 != NULL, 2116 ("No l2 table after allocating one")); 2117 2118 l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2119 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); 2120 if (l3_m == NULL) 2121 panic("pmap_enter: l3 pte_m == NULL"); 2122 if ((l3_m->flags & PG_ZERO) == 0) 2123 pmap_zero_page(l3_m); 2124 2125 l3_pa = VM_PAGE_TO_PHYS(l3_m); 2126 l3_pn = (l3_pa / PAGE_SIZE); 2127 entry = (PTE_V); 2128 entry |= (l3_pn << PTE_PPN0_S); 2129 pmap_load_store(l2, entry); 2130 PTE_SYNC(l2); 2131 l3 = pmap_l2_to_l3(l2, va); 2132 } 2133 pmap_invalidate_page(pmap, va); 2134 } 2135 2136 om = NULL; 2137 orig_l3 = pmap_load(l3); 2138 opa = PTE_TO_PHYS(orig_l3); 2139 2140 /* 2141 * Is the specified virtual address already mapped? 2142 */ 2143 if (pmap_l3_valid(orig_l3)) { 2144 /* 2145 * Wiring change, just update stats. We don't worry about 2146 * wiring PT pages as they remain resident as long as there 2147 * are valid mappings in them. Hence, if a user page is wired, 2148 * the PT page will be also. 2149 */ 2150 if ((flags & PMAP_ENTER_WIRED) != 0 && 2151 (orig_l3 & PTE_SW_WIRED) == 0) 2152 pmap->pm_stats.wired_count++; 2153 else if ((flags & PMAP_ENTER_WIRED) == 0 && 2154 (orig_l3 & PTE_SW_WIRED) != 0) 2155 pmap->pm_stats.wired_count--; 2156 2157 /* 2158 * Remove the extra PT page reference. 2159 */ 2160 if (mpte != NULL) { 2161 mpte->wire_count--; 2162 KASSERT(mpte->wire_count > 0, 2163 ("pmap_enter: missing reference to page table page," 2164 " va: 0x%lx", va)); 2165 } 2166 2167 /* 2168 * Has the physical page changed? 2169 */ 2170 if (opa == pa) { 2171 /* 2172 * No, might be a protection or wiring change. 2173 */ 2174 if ((orig_l3 & PTE_SW_MANAGED) != 0) { 2175 new_l3 |= PTE_SW_MANAGED; 2176 if (pmap_is_write(new_l3)) 2177 vm_page_aflag_set(m, PGA_WRITEABLE); 2178 } 2179 goto validate; 2180 } 2181 2182 /* Flush the cache, there might be uncommitted data in it */ 2183 if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3)) 2184 cpu_dcache_wb_range(va, L3_SIZE); 2185 } else { 2186 /* 2187 * Increment the counters. 2188 */ 2189 if ((new_l3 & PTE_SW_WIRED) != 0) 2190 pmap->pm_stats.wired_count++; 2191 pmap_resident_count_inc(pmap, 1); 2192 } 2193 /* 2194 * Enter on the PV list if part of our managed memory. 2195 */ 2196 if ((m->oflags & VPO_UNMANAGED) == 0) { 2197 new_l3 |= PTE_SW_MANAGED; 2198 pv = get_pv_entry(pmap, &lock); 2199 pv->pv_va = va; 2200 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); 2201 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2202 m->md.pv_gen++; 2203 if (pmap_is_write(new_l3)) 2204 vm_page_aflag_set(m, PGA_WRITEABLE); 2205 } 2206 2207 /* 2208 * Update the L3 entry. 2209 */ 2210 if (orig_l3 != 0) { 2211 validate: 2212 orig_l3 = pmap_load_store(l3, new_l3); 2213 PTE_SYNC(l3); 2214 opa = PTE_TO_PHYS(orig_l3); 2215 2216 if (opa != pa) { 2217 if ((orig_l3 & PTE_SW_MANAGED) != 0) { 2218 om = PHYS_TO_VM_PAGE(opa); 2219 if (pmap_page_dirty(orig_l3)) 2220 vm_page_dirty(om); 2221 if ((orig_l3 & PTE_A) != 0) 2222 vm_page_aflag_set(om, PGA_REFERENCED); 2223 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); 2224 pmap_pvh_free(&om->md, pmap, va); 2225 } 2226 } else if (pmap_page_dirty(orig_l3)) { 2227 if ((orig_l3 & PTE_SW_MANAGED) != 0) 2228 vm_page_dirty(m); 2229 } 2230 } else { 2231 pmap_load_store(l3, new_l3); 2232 PTE_SYNC(l3); 2233 } 2234 pmap_invalidate_page(pmap, va); 2235 if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) 2236 cpu_icache_sync_range(va, PAGE_SIZE); 2237 2238 if (lock != NULL) 2239 rw_wunlock(lock); 2240 rw_runlock(&pvh_global_lock); 2241 PMAP_UNLOCK(pmap); 2242 return (KERN_SUCCESS); 2243 } 2244 2245 /* 2246 * Maps a sequence of resident pages belonging to the same object. 2247 * The sequence begins with the given page m_start. This page is 2248 * mapped at the given virtual address start. Each subsequent page is 2249 * mapped at a virtual address that is offset from start by the same 2250 * amount as the page is offset from m_start within the object. The 2251 * last page in the sequence is the page with the largest offset from 2252 * m_start that can be mapped at a virtual address less than the given 2253 * virtual address end. Not every virtual page between start and end 2254 * is mapped; only those for which a resident page exists with the 2255 * corresponding offset from m_start are mapped. 2256 */ 2257 void 2258 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2259 vm_page_t m_start, vm_prot_t prot) 2260 { 2261 struct rwlock *lock; 2262 vm_offset_t va; 2263 vm_page_t m, mpte; 2264 vm_pindex_t diff, psize; 2265 2266 VM_OBJECT_ASSERT_LOCKED(m_start->object); 2267 2268 psize = atop(end - start); 2269 mpte = NULL; 2270 m = m_start; 2271 lock = NULL; 2272 rw_rlock(&pvh_global_lock); 2273 PMAP_LOCK(pmap); 2274 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2275 va = start + ptoa(diff); 2276 mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); 2277 m = TAILQ_NEXT(m, listq); 2278 } 2279 if (lock != NULL) 2280 rw_wunlock(lock); 2281 rw_runlock(&pvh_global_lock); 2282 PMAP_UNLOCK(pmap); 2283 } 2284 2285 /* 2286 * this code makes some *MAJOR* assumptions: 2287 * 1. Current pmap & pmap exists. 2288 * 2. Not wired. 2289 * 3. Read access. 2290 * 4. No page table pages. 2291 * but is *MUCH* faster than pmap_enter... 2292 */ 2293 2294 void 2295 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 2296 { 2297 struct rwlock *lock; 2298 2299 lock = NULL; 2300 rw_rlock(&pvh_global_lock); 2301 PMAP_LOCK(pmap); 2302 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); 2303 if (lock != NULL) 2304 rw_wunlock(lock); 2305 rw_runlock(&pvh_global_lock); 2306 PMAP_UNLOCK(pmap); 2307 } 2308 2309 static vm_page_t 2310 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 2311 vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) 2312 { 2313 struct spglist free; 2314 vm_paddr_t phys; 2315 pd_entry_t *l2; 2316 pt_entry_t *l3; 2317 vm_paddr_t pa; 2318 pt_entry_t entry; 2319 pn_t pn; 2320 2321 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 2322 (m->oflags & VPO_UNMANAGED) != 0, 2323 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 2324 rw_assert(&pvh_global_lock, RA_LOCKED); 2325 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2326 2327 CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); 2328 /* 2329 * In the case that a page table page is not 2330 * resident, we are creating it here. 2331 */ 2332 if (va < VM_MAXUSER_ADDRESS) { 2333 vm_pindex_t l2pindex; 2334 2335 /* 2336 * Calculate pagetable page index 2337 */ 2338 l2pindex = pmap_l2_pindex(va); 2339 if (mpte && (mpte->pindex == l2pindex)) { 2340 mpte->wire_count++; 2341 } else { 2342 /* 2343 * Get the l2 entry 2344 */ 2345 l2 = pmap_l2(pmap, va); 2346 2347 /* 2348 * If the page table page is mapped, we just increment 2349 * the hold count, and activate it. Otherwise, we 2350 * attempt to allocate a page table page. If this 2351 * attempt fails, we don't retry. Instead, we give up. 2352 */ 2353 if (l2 != NULL && pmap_load(l2) != 0) { 2354 phys = PTE_TO_PHYS(pmap_load(l2)); 2355 mpte = PHYS_TO_VM_PAGE(phys); 2356 mpte->wire_count++; 2357 } else { 2358 /* 2359 * Pass NULL instead of the PV list lock 2360 * pointer, because we don't intend to sleep. 2361 */ 2362 mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); 2363 if (mpte == NULL) 2364 return (mpte); 2365 } 2366 } 2367 l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); 2368 l3 = &l3[pmap_l3_index(va)]; 2369 } else { 2370 mpte = NULL; 2371 l3 = pmap_l3(kernel_pmap, va); 2372 } 2373 if (l3 == NULL) 2374 panic("pmap_enter_quick_locked: No l3"); 2375 if (pmap_load(l3) != 0) { 2376 if (mpte != NULL) { 2377 mpte->wire_count--; 2378 mpte = NULL; 2379 } 2380 return (mpte); 2381 } 2382 2383 /* 2384 * Enter on the PV list if part of our managed memory. 2385 */ 2386 if ((m->oflags & VPO_UNMANAGED) == 0 && 2387 !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { 2388 if (mpte != NULL) { 2389 SLIST_INIT(&free); 2390 if (pmap_unwire_l3(pmap, va, mpte, &free)) { 2391 pmap_invalidate_page(pmap, va); 2392 pmap_free_zero_pages(&free); 2393 } 2394 mpte = NULL; 2395 } 2396 return (mpte); 2397 } 2398 2399 /* 2400 * Increment counters 2401 */ 2402 pmap_resident_count_inc(pmap, 1); 2403 2404 pa = VM_PAGE_TO_PHYS(m); 2405 pn = (pa / PAGE_SIZE); 2406 2407 /* RISCVTODO: check permissions */ 2408 entry = (PTE_V | PTE_RWX); 2409 entry |= (pn << PTE_PPN0_S); 2410 2411 /* 2412 * Now validate mapping with RO protection 2413 */ 2414 if ((m->oflags & VPO_UNMANAGED) == 0) 2415 entry |= PTE_SW_MANAGED; 2416 pmap_load_store(l3, entry); 2417 2418 PTE_SYNC(l3); 2419 pmap_invalidate_page(pmap, va); 2420 return (mpte); 2421 } 2422 2423 /* 2424 * This code maps large physical mmap regions into the 2425 * processor address space. Note that some shortcuts 2426 * are taken, but the code works. 2427 */ 2428 void 2429 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 2430 vm_pindex_t pindex, vm_size_t size) 2431 { 2432 2433 VM_OBJECT_ASSERT_WLOCKED(object); 2434 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 2435 ("pmap_object_init_pt: non-device object")); 2436 } 2437 2438 /* 2439 * Clear the wired attribute from the mappings for the specified range of 2440 * addresses in the given pmap. Every valid mapping within that range 2441 * must have the wired attribute set. In contrast, invalid mappings 2442 * cannot have the wired attribute set, so they are ignored. 2443 * 2444 * The wired attribute of the page table entry is not a hardware feature, 2445 * so there is no need to invalidate any TLB entries. 2446 */ 2447 void 2448 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2449 { 2450 vm_offset_t va_next; 2451 pd_entry_t *l1, *l2; 2452 pt_entry_t *l3; 2453 boolean_t pv_lists_locked; 2454 2455 pv_lists_locked = FALSE; 2456 PMAP_LOCK(pmap); 2457 for (; sva < eva; sva = va_next) { 2458 l1 = pmap_l1(pmap, sva); 2459 if (pmap_load(l1) == 0) { 2460 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2461 if (va_next < sva) 2462 va_next = eva; 2463 continue; 2464 } 2465 2466 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2467 if (va_next < sva) 2468 va_next = eva; 2469 2470 l2 = pmap_l1_to_l2(l1, sva); 2471 if (pmap_load(l2) == 0) 2472 continue; 2473 2474 if (va_next > eva) 2475 va_next = eva; 2476 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 2477 sva += L3_SIZE) { 2478 if (pmap_load(l3) == 0) 2479 continue; 2480 if ((pmap_load(l3) & PTE_SW_WIRED) == 0) 2481 panic("pmap_unwire: l3 %#jx is missing " 2482 "PTE_SW_WIRED", (uintmax_t)*l3); 2483 2484 /* 2485 * PG_W must be cleared atomically. Although the pmap 2486 * lock synchronizes access to PG_W, another processor 2487 * could be setting PG_M and/or PG_A concurrently. 2488 */ 2489 atomic_clear_long(l3, PTE_SW_WIRED); 2490 pmap->pm_stats.wired_count--; 2491 } 2492 } 2493 if (pv_lists_locked) 2494 rw_runlock(&pvh_global_lock); 2495 PMAP_UNLOCK(pmap); 2496 } 2497 2498 /* 2499 * Copy the range specified by src_addr/len 2500 * from the source map to the range dst_addr/len 2501 * in the destination map. 2502 * 2503 * This routine is only advisory and need not do anything. 2504 */ 2505 2506 void 2507 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2508 vm_offset_t src_addr) 2509 { 2510 2511 } 2512 2513 /* 2514 * pmap_zero_page zeros the specified hardware page by mapping 2515 * the page into KVM and using bzero to clear its contents. 2516 */ 2517 void 2518 pmap_zero_page(vm_page_t m) 2519 { 2520 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2521 2522 pagezero((void *)va); 2523 } 2524 2525 /* 2526 * pmap_zero_page_area zeros the specified hardware page by mapping 2527 * the page into KVM and using bzero to clear its contents. 2528 * 2529 * off and size may not cover an area beyond a single hardware page. 2530 */ 2531 void 2532 pmap_zero_page_area(vm_page_t m, int off, int size) 2533 { 2534 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2535 2536 if (off == 0 && size == PAGE_SIZE) 2537 pagezero((void *)va); 2538 else 2539 bzero((char *)va + off, size); 2540 } 2541 2542 /* 2543 * pmap_copy_page copies the specified (machine independent) 2544 * page by mapping the page into virtual memory and using 2545 * bcopy to copy the page, one machine dependent page at a 2546 * time. 2547 */ 2548 void 2549 pmap_copy_page(vm_page_t msrc, vm_page_t mdst) 2550 { 2551 vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); 2552 vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); 2553 2554 pagecopy((void *)src, (void *)dst); 2555 } 2556 2557 int unmapped_buf_allowed = 1; 2558 2559 void 2560 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 2561 vm_offset_t b_offset, int xfersize) 2562 { 2563 void *a_cp, *b_cp; 2564 vm_page_t m_a, m_b; 2565 vm_paddr_t p_a, p_b; 2566 vm_offset_t a_pg_offset, b_pg_offset; 2567 int cnt; 2568 2569 while (xfersize > 0) { 2570 a_pg_offset = a_offset & PAGE_MASK; 2571 m_a = ma[a_offset >> PAGE_SHIFT]; 2572 p_a = m_a->phys_addr; 2573 b_pg_offset = b_offset & PAGE_MASK; 2574 m_b = mb[b_offset >> PAGE_SHIFT]; 2575 p_b = m_b->phys_addr; 2576 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2577 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2578 if (__predict_false(!PHYS_IN_DMAP(p_a))) { 2579 panic("!DMAP a %lx", p_a); 2580 } else { 2581 a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; 2582 } 2583 if (__predict_false(!PHYS_IN_DMAP(p_b))) { 2584 panic("!DMAP b %lx", p_b); 2585 } else { 2586 b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; 2587 } 2588 bcopy(a_cp, b_cp, cnt); 2589 a_offset += cnt; 2590 b_offset += cnt; 2591 xfersize -= cnt; 2592 } 2593 } 2594 2595 vm_offset_t 2596 pmap_quick_enter_page(vm_page_t m) 2597 { 2598 2599 return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); 2600 } 2601 2602 void 2603 pmap_quick_remove_page(vm_offset_t addr) 2604 { 2605 } 2606 2607 /* 2608 * Returns true if the pmap's pv is one of the first 2609 * 16 pvs linked to from this page. This count may 2610 * be changed upwards or downwards in the future; it 2611 * is only necessary that true be returned for a small 2612 * subset of pmaps for proper page aging. 2613 */ 2614 boolean_t 2615 pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2616 { 2617 struct rwlock *lock; 2618 pv_entry_t pv; 2619 int loops = 0; 2620 boolean_t rv; 2621 2622 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2623 ("pmap_page_exists_quick: page %p is not managed", m)); 2624 rv = FALSE; 2625 rw_rlock(&pvh_global_lock); 2626 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2627 rw_rlock(lock); 2628 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2629 if (PV_PMAP(pv) == pmap) { 2630 rv = TRUE; 2631 break; 2632 } 2633 loops++; 2634 if (loops >= 16) 2635 break; 2636 } 2637 rw_runlock(lock); 2638 rw_runlock(&pvh_global_lock); 2639 return (rv); 2640 } 2641 2642 /* 2643 * pmap_page_wired_mappings: 2644 * 2645 * Return the number of managed mappings to the given physical page 2646 * that are wired. 2647 */ 2648 int 2649 pmap_page_wired_mappings(vm_page_t m) 2650 { 2651 struct rwlock *lock; 2652 pmap_t pmap; 2653 pt_entry_t *l3; 2654 pv_entry_t pv; 2655 int count, md_gen; 2656 2657 if ((m->oflags & VPO_UNMANAGED) != 0) 2658 return (0); 2659 rw_rlock(&pvh_global_lock); 2660 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2661 rw_rlock(lock); 2662 restart: 2663 count = 0; 2664 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2665 pmap = PV_PMAP(pv); 2666 if (!PMAP_TRYLOCK(pmap)) { 2667 md_gen = m->md.pv_gen; 2668 rw_runlock(lock); 2669 PMAP_LOCK(pmap); 2670 rw_rlock(lock); 2671 if (md_gen != m->md.pv_gen) { 2672 PMAP_UNLOCK(pmap); 2673 goto restart; 2674 } 2675 } 2676 l3 = pmap_l3(pmap, pv->pv_va); 2677 if (l3 != NULL && (pmap_load(l3) & PTE_SW_WIRED) != 0) 2678 count++; 2679 PMAP_UNLOCK(pmap); 2680 } 2681 rw_runlock(lock); 2682 rw_runlock(&pvh_global_lock); 2683 return (count); 2684 } 2685 2686 /* 2687 * Destroy all managed, non-wired mappings in the given user-space 2688 * pmap. This pmap cannot be active on any processor besides the 2689 * caller. 2690 * 2691 * This function cannot be applied to the kernel pmap. Moreover, it 2692 * is not intended for general use. It is only to be used during 2693 * process termination. Consequently, it can be implemented in ways 2694 * that make it faster than pmap_remove(). First, it can more quickly 2695 * destroy mappings by iterating over the pmap's collection of PV 2696 * entries, rather than searching the page table. Second, it doesn't 2697 * have to test and clear the page table entries atomically, because 2698 * no processor is currently accessing the user address space. In 2699 * particular, a page table entry's dirty bit won't change state once 2700 * this function starts. 2701 */ 2702 void 2703 pmap_remove_pages(pmap_t pmap) 2704 { 2705 pd_entry_t ptepde, *l2; 2706 pt_entry_t *l3, tl3; 2707 struct spglist free; 2708 vm_page_t m; 2709 pv_entry_t pv; 2710 struct pv_chunk *pc, *npc; 2711 struct rwlock *lock; 2712 int64_t bit; 2713 uint64_t inuse, bitmask; 2714 int allfree, field, freed, idx; 2715 vm_paddr_t pa; 2716 2717 lock = NULL; 2718 2719 SLIST_INIT(&free); 2720 rw_rlock(&pvh_global_lock); 2721 PMAP_LOCK(pmap); 2722 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 2723 allfree = 1; 2724 freed = 0; 2725 for (field = 0; field < _NPCM; field++) { 2726 inuse = ~pc->pc_map[field] & pc_freemask[field]; 2727 while (inuse != 0) { 2728 bit = ffsl(inuse) - 1; 2729 bitmask = 1UL << bit; 2730 idx = field * 64 + bit; 2731 pv = &pc->pc_pventry[idx]; 2732 inuse &= ~bitmask; 2733 2734 l2 = pmap_l2(pmap, pv->pv_va); 2735 ptepde = pmap_load(l2); 2736 l3 = pmap_l2_to_l3(l2, pv->pv_va); 2737 tl3 = pmap_load(l3); 2738 2739 /* 2740 * We cannot remove wired pages from a process' mapping at this time 2741 */ 2742 if (tl3 & PTE_SW_WIRED) { 2743 allfree = 0; 2744 continue; 2745 } 2746 2747 pa = PTE_TO_PHYS(tl3); 2748 m = PHYS_TO_VM_PAGE(pa); 2749 KASSERT(m->phys_addr == pa, 2750 ("vm_page_t %p phys_addr mismatch %016jx %016jx", 2751 m, (uintmax_t)m->phys_addr, 2752 (uintmax_t)tl3)); 2753 2754 KASSERT((m->flags & PG_FICTITIOUS) != 0 || 2755 m < &vm_page_array[vm_page_array_size], 2756 ("pmap_remove_pages: bad l3 %#jx", 2757 (uintmax_t)tl3)); 2758 2759 if (pmap_is_current(pmap) && 2760 pmap_l3_valid_cacheable(pmap_load(l3))) 2761 cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 2762 pmap_load_clear(l3); 2763 PTE_SYNC(l3); 2764 pmap_invalidate_page(pmap, pv->pv_va); 2765 2766 /* 2767 * Update the vm_page_t clean/reference bits. 2768 */ 2769 if (pmap_page_dirty(tl3)) 2770 vm_page_dirty(m); 2771 2772 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); 2773 2774 /* Mark free */ 2775 pc->pc_map[field] |= bitmask; 2776 2777 pmap_resident_count_dec(pmap, 1); 2778 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2779 m->md.pv_gen++; 2780 2781 pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free); 2782 freed++; 2783 } 2784 } 2785 PV_STAT(atomic_add_long(&pv_entry_frees, freed)); 2786 PV_STAT(atomic_add_int(&pv_entry_spare, freed)); 2787 PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); 2788 if (allfree) { 2789 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2790 free_pv_chunk(pc); 2791 } 2792 } 2793 pmap_invalidate_all(pmap); 2794 if (lock != NULL) 2795 rw_wunlock(lock); 2796 rw_runlock(&pvh_global_lock); 2797 PMAP_UNLOCK(pmap); 2798 pmap_free_zero_pages(&free); 2799 } 2800 2801 /* 2802 * This is used to check if a page has been accessed or modified. As we 2803 * don't have a bit to see if it has been modified we have to assume it 2804 * has been if the page is read/write. 2805 */ 2806 static boolean_t 2807 pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) 2808 { 2809 struct rwlock *lock; 2810 pv_entry_t pv; 2811 pt_entry_t *l3, mask, value; 2812 pmap_t pmap; 2813 int md_gen; 2814 boolean_t rv; 2815 2816 rv = FALSE; 2817 rw_rlock(&pvh_global_lock); 2818 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2819 rw_rlock(lock); 2820 restart: 2821 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2822 pmap = PV_PMAP(pv); 2823 if (!PMAP_TRYLOCK(pmap)) { 2824 md_gen = m->md.pv_gen; 2825 rw_runlock(lock); 2826 PMAP_LOCK(pmap); 2827 rw_rlock(lock); 2828 if (md_gen != m->md.pv_gen) { 2829 PMAP_UNLOCK(pmap); 2830 goto restart; 2831 } 2832 } 2833 l3 = pmap_l3(pmap, pv->pv_va); 2834 mask = 0; 2835 value = 0; 2836 if (modified) { 2837 mask |= PTE_D; 2838 value |= PTE_D; 2839 } 2840 if (accessed) { 2841 mask |= PTE_A; 2842 value |= PTE_A; 2843 } 2844 2845 #if 0 2846 if (modified) { 2847 mask |= ATTR_AP_RW_BIT; 2848 value |= ATTR_AP(ATTR_AP_RW); 2849 } 2850 if (accessed) { 2851 mask |= ATTR_AF | ATTR_DESCR_MASK; 2852 value |= ATTR_AF | L3_PAGE; 2853 } 2854 #endif 2855 2856 rv = (pmap_load(l3) & mask) == value; 2857 PMAP_UNLOCK(pmap); 2858 if (rv) 2859 goto out; 2860 } 2861 out: 2862 rw_runlock(lock); 2863 rw_runlock(&pvh_global_lock); 2864 return (rv); 2865 } 2866 2867 /* 2868 * pmap_is_modified: 2869 * 2870 * Return whether or not the specified physical page was modified 2871 * in any physical maps. 2872 */ 2873 boolean_t 2874 pmap_is_modified(vm_page_t m) 2875 { 2876 2877 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2878 ("pmap_is_modified: page %p is not managed", m)); 2879 2880 /* 2881 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2882 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 2883 * is clear, no PTEs can have PG_M set. 2884 */ 2885 VM_OBJECT_ASSERT_WLOCKED(m->object); 2886 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2887 return (FALSE); 2888 return (pmap_page_test_mappings(m, FALSE, TRUE)); 2889 } 2890 2891 /* 2892 * pmap_is_prefaultable: 2893 * 2894 * Return whether or not the specified virtual address is eligible 2895 * for prefault. 2896 */ 2897 boolean_t 2898 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2899 { 2900 pt_entry_t *l3; 2901 boolean_t rv; 2902 2903 rv = FALSE; 2904 PMAP_LOCK(pmap); 2905 l3 = pmap_l3(pmap, addr); 2906 if (l3 != NULL && pmap_load(l3) != 0) { 2907 rv = TRUE; 2908 } 2909 PMAP_UNLOCK(pmap); 2910 return (rv); 2911 } 2912 2913 /* 2914 * pmap_is_referenced: 2915 * 2916 * Return whether or not the specified physical page was referenced 2917 * in any physical maps. 2918 */ 2919 boolean_t 2920 pmap_is_referenced(vm_page_t m) 2921 { 2922 2923 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2924 ("pmap_is_referenced: page %p is not managed", m)); 2925 return (pmap_page_test_mappings(m, TRUE, FALSE)); 2926 } 2927 2928 /* 2929 * Clear the write and modified bits in each of the given page's mappings. 2930 */ 2931 void 2932 pmap_remove_write(vm_page_t m) 2933 { 2934 pmap_t pmap; 2935 struct rwlock *lock; 2936 pv_entry_t pv; 2937 pt_entry_t *l3, oldl3; 2938 pt_entry_t newl3; 2939 int md_gen; 2940 2941 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2942 ("pmap_remove_write: page %p is not managed", m)); 2943 2944 /* 2945 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2946 * set by another thread while the object is locked. Thus, 2947 * if PGA_WRITEABLE is clear, no page table entries need updating. 2948 */ 2949 VM_OBJECT_ASSERT_WLOCKED(m->object); 2950 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2951 return; 2952 rw_rlock(&pvh_global_lock); 2953 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2954 retry_pv_loop: 2955 rw_wlock(lock); 2956 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2957 pmap = PV_PMAP(pv); 2958 if (!PMAP_TRYLOCK(pmap)) { 2959 md_gen = m->md.pv_gen; 2960 rw_wunlock(lock); 2961 PMAP_LOCK(pmap); 2962 rw_wlock(lock); 2963 if (md_gen != m->md.pv_gen) { 2964 PMAP_UNLOCK(pmap); 2965 rw_wunlock(lock); 2966 goto retry_pv_loop; 2967 } 2968 } 2969 l3 = pmap_l3(pmap, pv->pv_va); 2970 retry: 2971 oldl3 = pmap_load(l3); 2972 2973 if (pmap_is_write(oldl3)) { 2974 newl3 = oldl3 & ~(PTE_W); 2975 if (!atomic_cmpset_long(l3, oldl3, newl3)) 2976 goto retry; 2977 /* TODO: use pmap_page_dirty(oldl3) ? */ 2978 if ((oldl3 & PTE_A) != 0) 2979 vm_page_dirty(m); 2980 pmap_invalidate_page(pmap, pv->pv_va); 2981 } 2982 PMAP_UNLOCK(pmap); 2983 } 2984 rw_wunlock(lock); 2985 vm_page_aflag_clear(m, PGA_WRITEABLE); 2986 rw_runlock(&pvh_global_lock); 2987 } 2988 2989 static __inline boolean_t 2990 safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) 2991 { 2992 2993 return (FALSE); 2994 } 2995 2996 /* 2997 * pmap_ts_referenced: 2998 * 2999 * Return a count of reference bits for a page, clearing those bits. 3000 * It is not necessary for every reference bit to be cleared, but it 3001 * is necessary that 0 only be returned when there are truly no 3002 * reference bits set. 3003 * 3004 * As an optimization, update the page's dirty field if a modified bit is 3005 * found while counting reference bits. This opportunistic update can be 3006 * performed at low cost and can eliminate the need for some future calls 3007 * to pmap_is_modified(). However, since this function stops after 3008 * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some 3009 * dirty pages. Those dirty pages will only be detected by a future call 3010 * to pmap_is_modified(). 3011 */ 3012 int 3013 pmap_ts_referenced(vm_page_t m) 3014 { 3015 pv_entry_t pv, pvf; 3016 pmap_t pmap; 3017 struct rwlock *lock; 3018 pd_entry_t *l2; 3019 pt_entry_t *l3, old_l3; 3020 vm_paddr_t pa; 3021 int cleared, md_gen, not_cleared; 3022 struct spglist free; 3023 3024 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3025 ("pmap_ts_referenced: page %p is not managed", m)); 3026 SLIST_INIT(&free); 3027 cleared = 0; 3028 pa = VM_PAGE_TO_PHYS(m); 3029 lock = PHYS_TO_PV_LIST_LOCK(pa); 3030 rw_rlock(&pvh_global_lock); 3031 rw_wlock(lock); 3032 retry: 3033 not_cleared = 0; 3034 if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) 3035 goto out; 3036 pv = pvf; 3037 do { 3038 if (pvf == NULL) 3039 pvf = pv; 3040 pmap = PV_PMAP(pv); 3041 if (!PMAP_TRYLOCK(pmap)) { 3042 md_gen = m->md.pv_gen; 3043 rw_wunlock(lock); 3044 PMAP_LOCK(pmap); 3045 rw_wlock(lock); 3046 if (md_gen != m->md.pv_gen) { 3047 PMAP_UNLOCK(pmap); 3048 goto retry; 3049 } 3050 } 3051 l2 = pmap_l2(pmap, pv->pv_va); 3052 3053 KASSERT((pmap_load(l2) & PTE_RX) == 0, 3054 ("pmap_ts_referenced: found an invalid l2 table")); 3055 3056 l3 = pmap_l2_to_l3(l2, pv->pv_va); 3057 old_l3 = pmap_load(l3); 3058 if (pmap_page_dirty(old_l3)) 3059 vm_page_dirty(m); 3060 if ((old_l3 & PTE_A) != 0) { 3061 if (safe_to_clear_referenced(pmap, old_l3)) { 3062 /* 3063 * TODO: We don't handle the access flag 3064 * at all. We need to be able to set it in 3065 * the exception handler. 3066 */ 3067 panic("RISCVTODO: safe_to_clear_referenced\n"); 3068 } else if ((old_l3 & PTE_SW_WIRED) == 0) { 3069 /* 3070 * Wired pages cannot be paged out so 3071 * doing accessed bit emulation for 3072 * them is wasted effort. We do the 3073 * hard work for unwired pages only. 3074 */ 3075 pmap_remove_l3(pmap, l3, pv->pv_va, 3076 pmap_load(l2), &free, &lock); 3077 pmap_invalidate_page(pmap, pv->pv_va); 3078 cleared++; 3079 if (pvf == pv) 3080 pvf = NULL; 3081 pv = NULL; 3082 KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 3083 ("inconsistent pv lock %p %p for page %p", 3084 lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 3085 } else 3086 not_cleared++; 3087 } 3088 PMAP_UNLOCK(pmap); 3089 /* Rotate the PV list if it has more than one entry. */ 3090 if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { 3091 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 3092 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 3093 m->md.pv_gen++; 3094 } 3095 } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + 3096 not_cleared < PMAP_TS_REFERENCED_MAX); 3097 out: 3098 rw_wunlock(lock); 3099 rw_runlock(&pvh_global_lock); 3100 pmap_free_zero_pages(&free); 3101 return (cleared + not_cleared); 3102 } 3103 3104 /* 3105 * Apply the given advice to the specified range of addresses within the 3106 * given pmap. Depending on the advice, clear the referenced and/or 3107 * modified flags in each mapping and set the mapped page's dirty field. 3108 */ 3109 void 3110 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 3111 { 3112 } 3113 3114 /* 3115 * Clear the modify bits on the specified physical page. 3116 */ 3117 void 3118 pmap_clear_modify(vm_page_t m) 3119 { 3120 3121 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3122 ("pmap_clear_modify: page %p is not managed", m)); 3123 VM_OBJECT_ASSERT_WLOCKED(m->object); 3124 KASSERT(!vm_page_xbusied(m), 3125 ("pmap_clear_modify: page %p is exclusive busied", m)); 3126 3127 /* 3128 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. 3129 * If the object containing the page is locked and the page is not 3130 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 3131 */ 3132 if ((m->aflags & PGA_WRITEABLE) == 0) 3133 return; 3134 3135 /* RISCVTODO: We lack support for tracking if a page is modified */ 3136 } 3137 3138 void * 3139 pmap_mapbios(vm_paddr_t pa, vm_size_t size) 3140 { 3141 3142 return ((void *)PHYS_TO_DMAP(pa)); 3143 } 3144 3145 void 3146 pmap_unmapbios(vm_paddr_t pa, vm_size_t size) 3147 { 3148 } 3149 3150 /* 3151 * Sets the memory attribute for the specified page. 3152 */ 3153 void 3154 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 3155 { 3156 3157 m->md.pv_memattr = ma; 3158 3159 /* 3160 * RISCVTODO: Implement the below (from the amd64 pmap) 3161 * If "m" is a normal page, update its direct mapping. This update 3162 * can be relied upon to perform any cache operations that are 3163 * required for data coherence. 3164 */ 3165 if ((m->flags & PG_FICTITIOUS) == 0 && 3166 PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m))) 3167 panic("RISCVTODO: pmap_page_set_memattr"); 3168 } 3169 3170 /* 3171 * perform the pmap work for mincore 3172 */ 3173 int 3174 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 3175 { 3176 3177 panic("RISCVTODO: pmap_mincore"); 3178 } 3179 3180 void 3181 pmap_activate(struct thread *td) 3182 { 3183 pmap_t pmap; 3184 3185 critical_enter(); 3186 pmap = vmspace_pmap(td->td_proc->p_vmspace); 3187 td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1); 3188 3189 __asm __volatile("csrw sptbr, %0" :: "r"(td->td_pcb->pcb_l1addr >> PAGE_SHIFT)); 3190 3191 pmap_invalidate_all(pmap); 3192 critical_exit(); 3193 } 3194 3195 void 3196 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 3197 { 3198 3199 panic("RISCVTODO: pmap_sync_icache"); 3200 } 3201 3202 /* 3203 * Increase the starting virtual address of the given mapping if a 3204 * different alignment might result in more superpage mappings. 3205 */ 3206 void 3207 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 3208 vm_offset_t *addr, vm_size_t size) 3209 { 3210 } 3211 3212 /** 3213 * Get the kernel virtual address of a set of physical pages. If there are 3214 * physical addresses not covered by the DMAP perform a transient mapping 3215 * that will be removed when calling pmap_unmap_io_transient. 3216 * 3217 * \param page The pages the caller wishes to obtain the virtual 3218 * address on the kernel memory map. 3219 * \param vaddr On return contains the kernel virtual memory address 3220 * of the pages passed in the page parameter. 3221 * \param count Number of pages passed in. 3222 * \param can_fault TRUE if the thread using the mapped pages can take 3223 * page faults, FALSE otherwise. 3224 * 3225 * \returns TRUE if the caller must call pmap_unmap_io_transient when 3226 * finished or FALSE otherwise. 3227 * 3228 */ 3229 boolean_t 3230 pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 3231 boolean_t can_fault) 3232 { 3233 vm_paddr_t paddr; 3234 boolean_t needs_mapping; 3235 int error, i; 3236 3237 /* 3238 * Allocate any KVA space that we need, this is done in a separate 3239 * loop to prevent calling vmem_alloc while pinned. 3240 */ 3241 needs_mapping = FALSE; 3242 for (i = 0; i < count; i++) { 3243 paddr = VM_PAGE_TO_PHYS(page[i]); 3244 if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) { 3245 error = vmem_alloc(kernel_arena, PAGE_SIZE, 3246 M_BESTFIT | M_WAITOK, &vaddr[i]); 3247 KASSERT(error == 0, ("vmem_alloc failed: %d", error)); 3248 needs_mapping = TRUE; 3249 } else { 3250 vaddr[i] = PHYS_TO_DMAP(paddr); 3251 } 3252 } 3253 3254 /* Exit early if everything is covered by the DMAP */ 3255 if (!needs_mapping) 3256 return (FALSE); 3257 3258 if (!can_fault) 3259 sched_pin(); 3260 for (i = 0; i < count; i++) { 3261 paddr = VM_PAGE_TO_PHYS(page[i]); 3262 if (paddr >= DMAP_MAX_PHYSADDR) { 3263 panic( 3264 "pmap_map_io_transient: TODO: Map out of DMAP data"); 3265 } 3266 } 3267 3268 return (needs_mapping); 3269 } 3270 3271 void 3272 pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 3273 boolean_t can_fault) 3274 { 3275 vm_paddr_t paddr; 3276 int i; 3277 3278 if (!can_fault) 3279 sched_unpin(); 3280 for (i = 0; i < count; i++) { 3281 paddr = VM_PAGE_TO_PHYS(page[i]); 3282 if (paddr >= DMAP_MAX_PHYSADDR) { 3283 panic("RISCVTODO: pmap_unmap_io_transient: Unmap data"); 3284 } 3285 } 3286 } 3287