1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 1991 Regents of the University of California. 5 * All rights reserved. 6 * Copyright (c) 1994 John S. Dyson 7 * All rights reserved. 8 * Copyright (c) 1994 David Greenman 9 * All rights reserved. 10 * Copyright (c) 2003 Peter Wemm 11 * All rights reserved. 12 * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> 13 * All rights reserved. 14 * Copyright (c) 2014 Andrew Turner 15 * All rights reserved. 16 * Copyright (c) 2014 The FreeBSD Foundation 17 * All rights reserved. 18 * Copyright (c) 2015-2017 Ruslan Bukin <br@bsdpad.com> 19 * All rights reserved. 20 * 21 * This code is derived from software contributed to Berkeley by 22 * the Systems Programming Group of the University of Utah Computer 23 * Science Department and William Jolitz of UUNET Technologies Inc. 24 * 25 * Portions of this software were developed by Andrew Turner under 26 * sponsorship from The FreeBSD Foundation. 27 * 28 * Portions of this software were developed by SRI International and the 29 * University of Cambridge Computer Laboratory under DARPA/AFRL contract 30 * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. 31 * 32 * Portions of this software were developed by the University of Cambridge 33 * Computer Laboratory as part of the CTSRD Project, with support from the 34 * UK Higher Education Innovation Fund (HEIF). 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 65 */ 66 /*- 67 * Copyright (c) 2003 Networks Associates Technology, Inc. 68 * All rights reserved. 69 * 70 * This software was developed for the FreeBSD Project by Jake Burkholder, 71 * Safeport Network Services, and Network Associates Laboratories, the 72 * Security Research Division of Network Associates, Inc. under 73 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 74 * CHATS research program. 75 * 76 * Redistribution and use in source and binary forms, with or without 77 * modification, are permitted provided that the following conditions 78 * are met: 79 * 1. Redistributions of source code must retain the above copyright 80 * notice, this list of conditions and the following disclaimer. 81 * 2. Redistributions in binary form must reproduce the above copyright 82 * notice, this list of conditions and the following disclaimer in the 83 * documentation and/or other materials provided with the distribution. 84 * 85 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 86 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 87 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 88 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 89 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 90 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 91 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 92 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 93 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 94 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 95 * SUCH DAMAGE. 96 */ 97 98 #include <sys/cdefs.h> 99 __FBSDID("$FreeBSD$"); 100 101 /* 102 * Manages physical address maps. 103 * 104 * Since the information managed by this module is 105 * also stored by the logical address mapping module, 106 * this module may throw away valid virtual-to-physical 107 * mappings at almost any time. However, invalidations 108 * of virtual-to-physical mappings must be done as 109 * requested. 110 * 111 * In order to cope with hardware architectures which 112 * make virtual-to-physical map invalidates expensive, 113 * this module may delay invalidate or reduced protection 114 * operations until such time as they are actually 115 * necessary. This module is given full information as 116 * to which processors are currently using which maps, 117 * and to when physical maps must be made correct. 118 */ 119 120 #include <sys/param.h> 121 #include <sys/bus.h> 122 #include <sys/systm.h> 123 #include <sys/kernel.h> 124 #include <sys/ktr.h> 125 #include <sys/lock.h> 126 #include <sys/malloc.h> 127 #include <sys/mman.h> 128 #include <sys/msgbuf.h> 129 #include <sys/mutex.h> 130 #include <sys/proc.h> 131 #include <sys/rwlock.h> 132 #include <sys/sx.h> 133 #include <sys/vmem.h> 134 #include <sys/vmmeter.h> 135 #include <sys/sched.h> 136 #include <sys/sysctl.h> 137 #include <sys/smp.h> 138 139 #include <vm/vm.h> 140 #include <vm/vm_param.h> 141 #include <vm/vm_kern.h> 142 #include <vm/vm_page.h> 143 #include <vm/vm_map.h> 144 #include <vm/vm_object.h> 145 #include <vm/vm_extern.h> 146 #include <vm/vm_pageout.h> 147 #include <vm/vm_pager.h> 148 #include <vm/vm_radix.h> 149 #include <vm/vm_reserv.h> 150 #include <vm/uma.h> 151 152 #include <machine/machdep.h> 153 #include <machine/md_var.h> 154 #include <machine/pcb.h> 155 156 #define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) 157 #define NUPDE (NPDEPG * NPDEPG) 158 #define NUSERPGTBLS (NUPDE + NPDEPG) 159 160 #if !defined(DIAGNOSTIC) 161 #ifdef __GNUC_GNU_INLINE__ 162 #define PMAP_INLINE __attribute__((__gnu_inline__)) inline 163 #else 164 #define PMAP_INLINE extern inline 165 #endif 166 #else 167 #define PMAP_INLINE 168 #endif 169 170 #ifdef PV_STATS 171 #define PV_STAT(x) do { x ; } while (0) 172 #else 173 #define PV_STAT(x) do { } while (0) 174 #endif 175 176 #define pmap_l2_pindex(v) ((v) >> L2_SHIFT) 177 178 #define NPV_LIST_LOCKS MAXCPU 179 180 #define PHYS_TO_PV_LIST_LOCK(pa) \ 181 (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) 182 183 #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ 184 struct rwlock **_lockp = (lockp); \ 185 struct rwlock *_new_lock; \ 186 \ 187 _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ 188 if (_new_lock != *_lockp) { \ 189 if (*_lockp != NULL) \ 190 rw_wunlock(*_lockp); \ 191 *_lockp = _new_lock; \ 192 rw_wlock(*_lockp); \ 193 } \ 194 } while (0) 195 196 #define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ 197 CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) 198 199 #define RELEASE_PV_LIST_LOCK(lockp) do { \ 200 struct rwlock **_lockp = (lockp); \ 201 \ 202 if (*_lockp != NULL) { \ 203 rw_wunlock(*_lockp); \ 204 *_lockp = NULL; \ 205 } \ 206 } while (0) 207 208 #define VM_PAGE_TO_PV_LIST_LOCK(m) \ 209 PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) 210 211 /* The list of all the user pmaps */ 212 LIST_HEAD(pmaplist, pmap); 213 static struct pmaplist allpmaps; 214 215 static MALLOC_DEFINE(M_VMPMAP, "pmap", "PMAP L1"); 216 217 struct pmap kernel_pmap_store; 218 219 vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 220 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 221 vm_offset_t kernel_vm_end = 0; 222 223 vm_paddr_t dmap_phys_base; /* The start of the dmap region */ 224 vm_paddr_t dmap_phys_max; /* The limit of the dmap region */ 225 vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */ 226 227 /* This code assumes all L1 DMAP entries will be used */ 228 CTASSERT((DMAP_MIN_ADDRESS & ~L1_OFFSET) == DMAP_MIN_ADDRESS); 229 CTASSERT((DMAP_MAX_ADDRESS & ~L1_OFFSET) == DMAP_MAX_ADDRESS); 230 231 static struct rwlock_padalign pvh_global_lock; 232 233 /* 234 * Data for the pv entry allocation mechanism 235 */ 236 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 237 static struct mtx pv_chunks_mutex; 238 static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; 239 240 static void free_pv_chunk(struct pv_chunk *pc); 241 static void free_pv_entry(pmap_t pmap, pv_entry_t pv); 242 static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); 243 static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); 244 static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 245 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 246 vm_offset_t va); 247 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 248 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); 249 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, 250 pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); 251 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 252 vm_page_t m, struct rwlock **lockp); 253 254 static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, 255 struct rwlock **lockp); 256 257 static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, 258 struct spglist *free); 259 static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); 260 261 /* 262 * These load the old table data and store the new value. 263 * They need to be atomic as the System MMU may write to the table at 264 * the same time as the CPU. 265 */ 266 #define pmap_load_store(table, entry) atomic_swap_64(table, entry) 267 #define pmap_set(table, mask) atomic_set_64(table, mask) 268 #define pmap_load_clear(table) atomic_swap_64(table, 0) 269 #define pmap_load(table) (*table) 270 271 /********************/ 272 /* Inline functions */ 273 /********************/ 274 275 static __inline void 276 pagecopy(void *s, void *d) 277 { 278 279 memcpy(d, s, PAGE_SIZE); 280 } 281 282 static __inline void 283 pagezero(void *p) 284 { 285 286 bzero(p, PAGE_SIZE); 287 } 288 289 #define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) 290 #define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) 291 #define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) 292 293 #define PTE_TO_PHYS(pte) ((pte >> PTE_PPN0_S) * PAGE_SIZE) 294 295 static __inline pd_entry_t * 296 pmap_l1(pmap_t pmap, vm_offset_t va) 297 { 298 299 return (&pmap->pm_l1[pmap_l1_index(va)]); 300 } 301 302 static __inline pd_entry_t * 303 pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) 304 { 305 vm_paddr_t phys; 306 pd_entry_t *l2; 307 308 phys = PTE_TO_PHYS(pmap_load(l1)); 309 l2 = (pd_entry_t *)PHYS_TO_DMAP(phys); 310 311 return (&l2[pmap_l2_index(va)]); 312 } 313 314 static __inline pd_entry_t * 315 pmap_l2(pmap_t pmap, vm_offset_t va) 316 { 317 pd_entry_t *l1; 318 319 l1 = pmap_l1(pmap, va); 320 if (l1 == NULL) 321 return (NULL); 322 if ((pmap_load(l1) & PTE_V) == 0) 323 return (NULL); 324 if ((pmap_load(l1) & PTE_RX) != 0) 325 return (NULL); 326 327 return (pmap_l1_to_l2(l1, va)); 328 } 329 330 static __inline pt_entry_t * 331 pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) 332 { 333 vm_paddr_t phys; 334 pt_entry_t *l3; 335 336 phys = PTE_TO_PHYS(pmap_load(l2)); 337 l3 = (pd_entry_t *)PHYS_TO_DMAP(phys); 338 339 return (&l3[pmap_l3_index(va)]); 340 } 341 342 static __inline pt_entry_t * 343 pmap_l3(pmap_t pmap, vm_offset_t va) 344 { 345 pd_entry_t *l2; 346 347 l2 = pmap_l2(pmap, va); 348 if (l2 == NULL) 349 return (NULL); 350 if ((pmap_load(l2) & PTE_V) == 0) 351 return (NULL); 352 if ((pmap_load(l2) & PTE_RX) != 0) 353 return (NULL); 354 355 return (pmap_l2_to_l3(l2, va)); 356 } 357 358 359 static __inline int 360 pmap_is_write(pt_entry_t entry) 361 { 362 363 return (entry & PTE_W); 364 } 365 366 static __inline int 367 pmap_is_current(pmap_t pmap) 368 { 369 370 return ((pmap == pmap_kernel()) || 371 (pmap == curthread->td_proc->p_vmspace->vm_map.pmap)); 372 } 373 374 static __inline int 375 pmap_l3_valid(pt_entry_t l3) 376 { 377 378 return (l3 & PTE_V); 379 } 380 381 static __inline int 382 pmap_l3_valid_cacheable(pt_entry_t l3) 383 { 384 385 /* TODO */ 386 387 return (0); 388 } 389 390 #define PTE_SYNC(pte) cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte)) 391 392 /* Checks if the page is dirty. */ 393 static inline int 394 pmap_page_dirty(pt_entry_t pte) 395 { 396 397 return (pte & PTE_D); 398 } 399 400 static __inline void 401 pmap_resident_count_inc(pmap_t pmap, int count) 402 { 403 404 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 405 pmap->pm_stats.resident_count += count; 406 } 407 408 static __inline void 409 pmap_resident_count_dec(pmap_t pmap, int count) 410 { 411 412 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 413 KASSERT(pmap->pm_stats.resident_count >= count, 414 ("pmap %p resident count underflow %ld %d", pmap, 415 pmap->pm_stats.resident_count, count)); 416 pmap->pm_stats.resident_count -= count; 417 } 418 419 static void 420 pmap_distribute_l1(struct pmap *pmap, vm_pindex_t l1index, 421 pt_entry_t entry) 422 { 423 struct pmap *user_pmap; 424 pd_entry_t *l1; 425 426 /* Distribute new kernel L1 entry to all the user pmaps */ 427 if (pmap != kernel_pmap) 428 return; 429 430 LIST_FOREACH(user_pmap, &allpmaps, pm_list) { 431 l1 = &user_pmap->pm_l1[l1index]; 432 if (entry) 433 pmap_load_store(l1, entry); 434 else 435 pmap_load_clear(l1); 436 } 437 } 438 439 static pt_entry_t * 440 pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, 441 u_int *l2_slot) 442 { 443 pt_entry_t *l2; 444 pd_entry_t *l1; 445 446 l1 = (pd_entry_t *)l1pt; 447 *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; 448 449 /* Check locore has used a table L1 map */ 450 KASSERT((l1[*l1_slot] & PTE_RX) == 0, 451 ("Invalid bootstrap L1 table")); 452 453 /* Find the address of the L2 table */ 454 l2 = (pt_entry_t *)init_pt_va; 455 *l2_slot = pmap_l2_index(va); 456 457 return (l2); 458 } 459 460 static vm_paddr_t 461 pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) 462 { 463 u_int l1_slot, l2_slot; 464 pt_entry_t *l2; 465 u_int ret; 466 467 l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); 468 469 /* Check locore has used L2 superpages */ 470 KASSERT((l2[l2_slot] & PTE_RX) != 0, 471 ("Invalid bootstrap L2 table")); 472 473 /* L2 is superpages */ 474 ret = (l2[l2_slot] >> PTE_PPN1_S) << L2_SHIFT; 475 ret += (va & L2_OFFSET); 476 477 return (ret); 478 } 479 480 static void 481 pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa) 482 { 483 vm_offset_t va; 484 vm_paddr_t pa; 485 pd_entry_t *l1; 486 u_int l1_slot; 487 pt_entry_t entry; 488 pn_t pn; 489 490 pa = dmap_phys_base = min_pa & ~L1_OFFSET; 491 va = DMAP_MIN_ADDRESS; 492 l1 = (pd_entry_t *)kern_l1; 493 l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS); 494 495 for (; va < DMAP_MAX_ADDRESS && pa < max_pa; 496 pa += L1_SIZE, va += L1_SIZE, l1_slot++) { 497 KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); 498 499 /* superpages */ 500 pn = (pa / PAGE_SIZE); 501 entry = (PTE_V | PTE_RWX); 502 entry |= (pn << PTE_PPN0_S); 503 pmap_load_store(&l1[l1_slot], entry); 504 } 505 506 /* Set the upper limit of the DMAP region */ 507 dmap_phys_max = pa; 508 dmap_max_addr = va; 509 510 cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); 511 cpu_tlb_flushID(); 512 } 513 514 static vm_offset_t 515 pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) 516 { 517 vm_offset_t l2pt, l3pt; 518 pt_entry_t entry; 519 pd_entry_t *l2; 520 vm_paddr_t pa; 521 u_int l2_slot; 522 pn_t pn; 523 524 KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); 525 526 l2 = pmap_l2(kernel_pmap, va); 527 l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1)); 528 l2pt = (vm_offset_t)l2; 529 l2_slot = pmap_l2_index(va); 530 l3pt = l3_start; 531 532 for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { 533 KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); 534 535 pa = pmap_early_vtophys(l1pt, l3pt); 536 pn = (pa / PAGE_SIZE); 537 entry = (PTE_V); 538 entry |= (pn << PTE_PPN0_S); 539 pmap_load_store(&l2[l2_slot], entry); 540 l3pt += PAGE_SIZE; 541 } 542 543 544 /* Clean the L2 page table */ 545 memset((void *)l3_start, 0, l3pt - l3_start); 546 cpu_dcache_wb_range(l3_start, l3pt - l3_start); 547 548 cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); 549 550 return (l3pt); 551 } 552 553 /* 554 * Bootstrap the system enough to run with virtual memory. 555 */ 556 void 557 pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) 558 { 559 u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot; 560 uint64_t kern_delta; 561 pt_entry_t *l2; 562 vm_offset_t va, freemempos; 563 vm_offset_t dpcpu, msgbufpv; 564 vm_paddr_t pa, min_pa, max_pa; 565 int i; 566 567 kern_delta = KERNBASE - kernstart; 568 physmem = 0; 569 570 printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); 571 printf("%lx\n", l1pt); 572 printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); 573 574 /* Set this early so we can use the pagetable walking functions */ 575 kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt; 576 PMAP_LOCK_INIT(kernel_pmap); 577 578 /* 579 * Initialize the global pv list lock. 580 */ 581 rw_init(&pvh_global_lock, "pmap pv global"); 582 583 LIST_INIT(&allpmaps); 584 585 /* Assume the address we were loaded to is a valid physical address */ 586 min_pa = max_pa = KERNBASE - kern_delta; 587 588 /* 589 * Find the minimum physical address. physmap is sorted, 590 * but may contain empty ranges. 591 */ 592 for (i = 0; i < (physmap_idx * 2); i += 2) { 593 if (physmap[i] == physmap[i + 1]) 594 continue; 595 if (physmap[i] <= min_pa) 596 min_pa = physmap[i]; 597 if (physmap[i + 1] > max_pa) 598 max_pa = physmap[i + 1]; 599 } 600 printf("physmap_idx %lx\n", physmap_idx); 601 printf("min_pa %lx\n", min_pa); 602 printf("max_pa %lx\n", max_pa); 603 604 /* Create a direct map region early so we can use it for pa -> va */ 605 pmap_bootstrap_dmap(l1pt, min_pa, max_pa); 606 607 va = KERNBASE; 608 pa = KERNBASE - kern_delta; 609 610 /* 611 * Start to initialize phys_avail by copying from physmap 612 * up to the physical address KERNBASE points at. 613 */ 614 map_slot = avail_slot = 0; 615 for (; map_slot < (physmap_idx * 2); map_slot += 2) { 616 if (physmap[map_slot] == physmap[map_slot + 1]) 617 continue; 618 619 if (physmap[map_slot] <= pa && 620 physmap[map_slot + 1] > pa) 621 break; 622 623 phys_avail[avail_slot] = physmap[map_slot]; 624 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 625 physmem += (phys_avail[avail_slot + 1] - 626 phys_avail[avail_slot]) >> PAGE_SHIFT; 627 avail_slot += 2; 628 } 629 630 /* Add the memory before the kernel */ 631 if (physmap[avail_slot] < pa) { 632 phys_avail[avail_slot] = physmap[map_slot]; 633 phys_avail[avail_slot + 1] = pa; 634 physmem += (phys_avail[avail_slot + 1] - 635 phys_avail[avail_slot]) >> PAGE_SHIFT; 636 avail_slot += 2; 637 } 638 used_map_slot = map_slot; 639 640 /* 641 * Read the page table to find out what is already mapped. 642 * This assumes we have mapped a block of memory from KERNBASE 643 * using a single L1 entry. 644 */ 645 l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); 646 647 /* Sanity check the index, KERNBASE should be the first VA */ 648 KASSERT(l2_slot == 0, ("The L2 index is non-zero")); 649 650 /* Find how many pages we have mapped */ 651 for (; l2_slot < Ln_ENTRIES; l2_slot++) { 652 if ((l2[l2_slot] & PTE_V) == 0) 653 break; 654 655 /* Check locore used L2 superpages */ 656 KASSERT((l2[l2_slot] & PTE_RX) != 0, 657 ("Invalid bootstrap L2 table")); 658 659 va += L2_SIZE; 660 pa += L2_SIZE; 661 } 662 663 va = roundup2(va, L2_SIZE); 664 665 freemempos = KERNBASE + kernlen; 666 freemempos = roundup2(freemempos, PAGE_SIZE); 667 668 /* Create the l3 tables for the early devmap */ 669 freemempos = pmap_bootstrap_l3(l1pt, 670 VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos); 671 672 cpu_tlb_flushID(); 673 674 #define alloc_pages(var, np) \ 675 (var) = freemempos; \ 676 freemempos += (np * PAGE_SIZE); \ 677 memset((char *)(var), 0, ((np) * PAGE_SIZE)); 678 679 /* Allocate dynamic per-cpu area. */ 680 alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); 681 dpcpu_init((void *)dpcpu, 0); 682 683 /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ 684 alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); 685 msgbufp = (void *)msgbufpv; 686 687 virtual_avail = roundup2(freemempos, L2_SIZE); 688 virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; 689 kernel_vm_end = virtual_avail; 690 691 pa = pmap_early_vtophys(l1pt, freemempos); 692 693 /* Finish initialising physmap */ 694 map_slot = used_map_slot; 695 for (; avail_slot < (PHYS_AVAIL_SIZE - 2) && 696 map_slot < (physmap_idx * 2); map_slot += 2) { 697 if (physmap[map_slot] == physmap[map_slot + 1]) { 698 continue; 699 } 700 701 /* Have we used the current range? */ 702 if (physmap[map_slot + 1] <= pa) { 703 continue; 704 } 705 706 /* Do we need to split the entry? */ 707 if (physmap[map_slot] < pa) { 708 phys_avail[avail_slot] = pa; 709 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 710 } else { 711 phys_avail[avail_slot] = physmap[map_slot]; 712 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 713 } 714 physmem += (phys_avail[avail_slot + 1] - 715 phys_avail[avail_slot]) >> PAGE_SHIFT; 716 717 avail_slot += 2; 718 } 719 phys_avail[avail_slot] = 0; 720 phys_avail[avail_slot + 1] = 0; 721 722 /* 723 * Maxmem isn't the "maximum memory", it's one larger than the 724 * highest page of the physical address space. It should be 725 * called something like "Maxphyspage". 726 */ 727 Maxmem = atop(phys_avail[avail_slot - 1]); 728 729 cpu_tlb_flushID(); 730 } 731 732 /* 733 * Initialize a vm_page's machine-dependent fields. 734 */ 735 void 736 pmap_page_init(vm_page_t m) 737 { 738 739 TAILQ_INIT(&m->md.pv_list); 740 m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; 741 } 742 743 /* 744 * Initialize the pmap module. 745 * Called by vm_init, to initialize any structures that the pmap 746 * system needs to map virtual memory. 747 */ 748 void 749 pmap_init(void) 750 { 751 int i; 752 753 /* 754 * Initialize the pv chunk list mutex. 755 */ 756 mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); 757 758 /* 759 * Initialize the pool of pv list locks. 760 */ 761 for (i = 0; i < NPV_LIST_LOCKS; i++) 762 rw_init(&pv_list_locks[i], "pmap pv list"); 763 } 764 765 /* 766 * Normal, non-SMP, invalidation functions. 767 * We inline these within pmap.c for speed. 768 */ 769 PMAP_INLINE void 770 pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 771 { 772 773 /* TODO */ 774 775 sched_pin(); 776 __asm __volatile("sfence.vma %0" :: "r" (va) : "memory"); 777 sched_unpin(); 778 } 779 780 PMAP_INLINE void 781 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 782 { 783 784 /* TODO */ 785 786 sched_pin(); 787 __asm __volatile("sfence.vma"); 788 sched_unpin(); 789 } 790 791 PMAP_INLINE void 792 pmap_invalidate_all(pmap_t pmap) 793 { 794 795 /* TODO */ 796 797 sched_pin(); 798 __asm __volatile("sfence.vma"); 799 sched_unpin(); 800 } 801 802 /* 803 * Routine: pmap_extract 804 * Function: 805 * Extract the physical page address associated 806 * with the given map/virtual_address pair. 807 */ 808 vm_paddr_t 809 pmap_extract(pmap_t pmap, vm_offset_t va) 810 { 811 pd_entry_t *l2p, l2; 812 pt_entry_t *l3p, l3; 813 vm_paddr_t pa; 814 815 pa = 0; 816 PMAP_LOCK(pmap); 817 /* 818 * Start with the l2 tabel. We are unable to allocate 819 * pages in the l1 table. 820 */ 821 l2p = pmap_l2(pmap, va); 822 if (l2p != NULL) { 823 l2 = pmap_load(l2p); 824 if ((l2 & PTE_RX) == 0) { 825 l3p = pmap_l2_to_l3(l2p, va); 826 if (l3p != NULL) { 827 l3 = pmap_load(l3p); 828 pa = PTE_TO_PHYS(l3); 829 pa |= (va & L3_OFFSET); 830 } 831 } else { 832 /* L2 is superpages */ 833 pa = (l2 >> PTE_PPN1_S) << L2_SHIFT; 834 pa |= (va & L2_OFFSET); 835 } 836 } 837 PMAP_UNLOCK(pmap); 838 return (pa); 839 } 840 841 /* 842 * Routine: pmap_extract_and_hold 843 * Function: 844 * Atomically extract and hold the physical page 845 * with the given pmap and virtual address pair 846 * if that mapping permits the given protection. 847 */ 848 vm_page_t 849 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 850 { 851 pt_entry_t *l3p, l3; 852 vm_paddr_t phys; 853 vm_paddr_t pa; 854 vm_page_t m; 855 856 pa = 0; 857 m = NULL; 858 PMAP_LOCK(pmap); 859 retry: 860 l3p = pmap_l3(pmap, va); 861 if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) { 862 if ((pmap_is_write(l3)) || ((prot & VM_PROT_WRITE) == 0)) { 863 phys = PTE_TO_PHYS(l3); 864 if (vm_page_pa_tryrelock(pmap, phys, &pa)) 865 goto retry; 866 m = PHYS_TO_VM_PAGE(phys); 867 vm_page_hold(m); 868 } 869 } 870 PA_UNLOCK_COND(pa); 871 PMAP_UNLOCK(pmap); 872 return (m); 873 } 874 875 vm_paddr_t 876 pmap_kextract(vm_offset_t va) 877 { 878 pd_entry_t *l2; 879 pt_entry_t *l3; 880 vm_paddr_t pa; 881 882 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 883 pa = DMAP_TO_PHYS(va); 884 } else { 885 l2 = pmap_l2(kernel_pmap, va); 886 if (l2 == NULL) 887 panic("pmap_kextract: No l2"); 888 if ((pmap_load(l2) & PTE_RX) != 0) { 889 /* superpages */ 890 pa = (pmap_load(l2) >> PTE_PPN1_S) << L2_SHIFT; 891 pa |= (va & L2_OFFSET); 892 return (pa); 893 } 894 895 l3 = pmap_l2_to_l3(l2, va); 896 if (l3 == NULL) 897 panic("pmap_kextract: No l3..."); 898 pa = PTE_TO_PHYS(pmap_load(l3)); 899 pa |= (va & PAGE_MASK); 900 } 901 return (pa); 902 } 903 904 /*************************************************** 905 * Low level mapping routines..... 906 ***************************************************/ 907 908 void 909 pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) 910 { 911 pt_entry_t entry; 912 pt_entry_t *l3; 913 vm_offset_t va; 914 pn_t pn; 915 916 KASSERT((pa & L3_OFFSET) == 0, 917 ("pmap_kenter_device: Invalid physical address")); 918 KASSERT((sva & L3_OFFSET) == 0, 919 ("pmap_kenter_device: Invalid virtual address")); 920 KASSERT((size & PAGE_MASK) == 0, 921 ("pmap_kenter_device: Mapping is not page-sized")); 922 923 va = sva; 924 while (size != 0) { 925 l3 = pmap_l3(kernel_pmap, va); 926 KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); 927 928 pn = (pa / PAGE_SIZE); 929 entry = (PTE_V | PTE_RWX); 930 entry |= (pn << PTE_PPN0_S); 931 pmap_load_store(l3, entry); 932 933 PTE_SYNC(l3); 934 935 va += PAGE_SIZE; 936 pa += PAGE_SIZE; 937 size -= PAGE_SIZE; 938 } 939 pmap_invalidate_range(kernel_pmap, sva, va); 940 } 941 942 /* 943 * Remove a page from the kernel pagetables. 944 * Note: not SMP coherent. 945 */ 946 PMAP_INLINE void 947 pmap_kremove(vm_offset_t va) 948 { 949 pt_entry_t *l3; 950 951 l3 = pmap_l3(kernel_pmap, va); 952 KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); 953 954 if (pmap_l3_valid_cacheable(pmap_load(l3))) 955 cpu_dcache_wb_range(va, L3_SIZE); 956 pmap_load_clear(l3); 957 PTE_SYNC(l3); 958 pmap_invalidate_page(kernel_pmap, va); 959 } 960 961 void 962 pmap_kremove_device(vm_offset_t sva, vm_size_t size) 963 { 964 pt_entry_t *l3; 965 vm_offset_t va; 966 967 KASSERT((sva & L3_OFFSET) == 0, 968 ("pmap_kremove_device: Invalid virtual address")); 969 KASSERT((size & PAGE_MASK) == 0, 970 ("pmap_kremove_device: Mapping is not page-sized")); 971 972 va = sva; 973 while (size != 0) { 974 l3 = pmap_l3(kernel_pmap, va); 975 KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); 976 pmap_load_clear(l3); 977 PTE_SYNC(l3); 978 979 va += PAGE_SIZE; 980 size -= PAGE_SIZE; 981 } 982 pmap_invalidate_range(kernel_pmap, sva, va); 983 } 984 985 /* 986 * Used to map a range of physical addresses into kernel 987 * virtual address space. 988 * 989 * The value passed in '*virt' is a suggested virtual address for 990 * the mapping. Architectures which can support a direct-mapped 991 * physical to virtual region can return the appropriate address 992 * within that region, leaving '*virt' unchanged. Other 993 * architectures should map the pages starting at '*virt' and 994 * update '*virt' with the first usable address after the mapped 995 * region. 996 */ 997 vm_offset_t 998 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 999 { 1000 1001 return PHYS_TO_DMAP(start); 1002 } 1003 1004 1005 /* 1006 * Add a list of wired pages to the kva 1007 * this routine is only used for temporary 1008 * kernel mappings that do not need to have 1009 * page modification or references recorded. 1010 * Note that old mappings are simply written 1011 * over. The page *must* be wired. 1012 * Note: SMP coherent. Uses a ranged shootdown IPI. 1013 */ 1014 void 1015 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1016 { 1017 pt_entry_t *l3, pa; 1018 vm_offset_t va; 1019 vm_page_t m; 1020 pt_entry_t entry; 1021 pn_t pn; 1022 int i; 1023 1024 va = sva; 1025 for (i = 0; i < count; i++) { 1026 m = ma[i]; 1027 pa = VM_PAGE_TO_PHYS(m); 1028 pn = (pa / PAGE_SIZE); 1029 l3 = pmap_l3(kernel_pmap, va); 1030 1031 entry = (PTE_V | PTE_RWX); 1032 entry |= (pn << PTE_PPN0_S); 1033 pmap_load_store(l3, entry); 1034 1035 PTE_SYNC(l3); 1036 va += L3_SIZE; 1037 } 1038 pmap_invalidate_range(kernel_pmap, sva, va); 1039 } 1040 1041 /* 1042 * This routine tears out page mappings from the 1043 * kernel -- it is meant only for temporary mappings. 1044 * Note: SMP coherent. Uses a ranged shootdown IPI. 1045 */ 1046 void 1047 pmap_qremove(vm_offset_t sva, int count) 1048 { 1049 pt_entry_t *l3; 1050 vm_offset_t va; 1051 1052 KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); 1053 1054 va = sva; 1055 while (count-- > 0) { 1056 l3 = pmap_l3(kernel_pmap, va); 1057 KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); 1058 1059 if (pmap_l3_valid_cacheable(pmap_load(l3))) 1060 cpu_dcache_wb_range(va, L3_SIZE); 1061 pmap_load_clear(l3); 1062 PTE_SYNC(l3); 1063 1064 va += PAGE_SIZE; 1065 } 1066 pmap_invalidate_range(kernel_pmap, sva, va); 1067 } 1068 1069 /*************************************************** 1070 * Page table page management routines..... 1071 ***************************************************/ 1072 /* 1073 * Schedule the specified unused page table page to be freed. Specifically, 1074 * add the page to the specified list of pages that will be released to the 1075 * physical memory manager after the TLB has been updated. 1076 */ 1077 static __inline void 1078 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, 1079 boolean_t set_PG_ZERO) 1080 { 1081 1082 if (set_PG_ZERO) 1083 m->flags |= PG_ZERO; 1084 else 1085 m->flags &= ~PG_ZERO; 1086 SLIST_INSERT_HEAD(free, m, plinks.s.ss); 1087 } 1088 1089 /* 1090 * Decrements a page table page's wire count, which is used to record the 1091 * number of valid page table entries within the page. If the wire count 1092 * drops to zero, then the page table page is unmapped. Returns TRUE if the 1093 * page table page was unmapped and FALSE otherwise. 1094 */ 1095 static inline boolean_t 1096 pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1097 { 1098 1099 --m->wire_count; 1100 if (m->wire_count == 0) { 1101 _pmap_unwire_l3(pmap, va, m, free); 1102 return (TRUE); 1103 } else { 1104 return (FALSE); 1105 } 1106 } 1107 1108 static void 1109 _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1110 { 1111 vm_paddr_t phys; 1112 1113 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1114 /* 1115 * unmap the page table page 1116 */ 1117 if (m->pindex >= NUPDE) { 1118 /* PD page */ 1119 pd_entry_t *l1; 1120 l1 = pmap_l1(pmap, va); 1121 pmap_load_clear(l1); 1122 pmap_distribute_l1(pmap, pmap_l1_index(va), 0); 1123 PTE_SYNC(l1); 1124 } else { 1125 /* PTE page */ 1126 pd_entry_t *l2; 1127 l2 = pmap_l2(pmap, va); 1128 pmap_load_clear(l2); 1129 PTE_SYNC(l2); 1130 } 1131 pmap_resident_count_dec(pmap, 1); 1132 if (m->pindex < NUPDE) { 1133 pd_entry_t *l1; 1134 /* We just released a PT, unhold the matching PD */ 1135 vm_page_t pdpg; 1136 1137 l1 = pmap_l1(pmap, va); 1138 phys = PTE_TO_PHYS(pmap_load(l1)); 1139 pdpg = PHYS_TO_VM_PAGE(phys); 1140 pmap_unwire_l3(pmap, va, pdpg, free); 1141 } 1142 pmap_invalidate_page(pmap, va); 1143 1144 vm_wire_sub(1); 1145 1146 /* 1147 * Put page on a list so that it is released after 1148 * *ALL* TLB shootdown is done 1149 */ 1150 pmap_add_delayed_free_list(m, free, TRUE); 1151 } 1152 1153 /* 1154 * After removing an l3 entry, this routine is used to 1155 * conditionally free the page, and manage the hold/wire counts. 1156 */ 1157 static int 1158 pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, 1159 struct spglist *free) 1160 { 1161 vm_paddr_t phys; 1162 vm_page_t mpte; 1163 1164 if (va >= VM_MAXUSER_ADDRESS) 1165 return (0); 1166 KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); 1167 1168 phys = PTE_TO_PHYS(ptepde); 1169 1170 mpte = PHYS_TO_VM_PAGE(phys); 1171 return (pmap_unwire_l3(pmap, va, mpte, free)); 1172 } 1173 1174 void 1175 pmap_pinit0(pmap_t pmap) 1176 { 1177 1178 PMAP_LOCK_INIT(pmap); 1179 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1180 pmap->pm_l1 = kernel_pmap->pm_l1; 1181 } 1182 1183 int 1184 pmap_pinit(pmap_t pmap) 1185 { 1186 vm_paddr_t l1phys; 1187 vm_page_t l1pt; 1188 1189 /* 1190 * allocate the l1 page 1191 */ 1192 while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL | 1193 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) 1194 vm_wait(NULL); 1195 1196 l1phys = VM_PAGE_TO_PHYS(l1pt); 1197 pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys); 1198 1199 if ((l1pt->flags & PG_ZERO) == 0) 1200 pagezero(pmap->pm_l1); 1201 1202 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1203 1204 /* Install kernel pagetables */ 1205 memcpy(pmap->pm_l1, kernel_pmap->pm_l1, PAGE_SIZE); 1206 1207 /* Add to the list of all user pmaps */ 1208 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1209 1210 return (1); 1211 } 1212 1213 /* 1214 * This routine is called if the desired page table page does not exist. 1215 * 1216 * If page table page allocation fails, this routine may sleep before 1217 * returning NULL. It sleeps only if a lock pointer was given. 1218 * 1219 * Note: If a page allocation fails at page table level two or three, 1220 * one or two pages may be held during the wait, only to be released 1221 * afterwards. This conservative approach is easily argued to avoid 1222 * race conditions. 1223 */ 1224 static vm_page_t 1225 _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) 1226 { 1227 vm_page_t m, /*pdppg, */pdpg; 1228 pt_entry_t entry; 1229 vm_paddr_t phys; 1230 pn_t pn; 1231 1232 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1233 1234 /* 1235 * Allocate a page table page. 1236 */ 1237 if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1238 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1239 if (lockp != NULL) { 1240 RELEASE_PV_LIST_LOCK(lockp); 1241 PMAP_UNLOCK(pmap); 1242 rw_runlock(&pvh_global_lock); 1243 vm_wait(NULL); 1244 rw_rlock(&pvh_global_lock); 1245 PMAP_LOCK(pmap); 1246 } 1247 1248 /* 1249 * Indicate the need to retry. While waiting, the page table 1250 * page may have been allocated. 1251 */ 1252 return (NULL); 1253 } 1254 1255 if ((m->flags & PG_ZERO) == 0) 1256 pmap_zero_page(m); 1257 1258 /* 1259 * Map the pagetable page into the process address space, if 1260 * it isn't already there. 1261 */ 1262 1263 if (ptepindex >= NUPDE) { 1264 pd_entry_t *l1; 1265 vm_pindex_t l1index; 1266 1267 l1index = ptepindex - NUPDE; 1268 l1 = &pmap->pm_l1[l1index]; 1269 1270 pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE); 1271 entry = (PTE_V); 1272 entry |= (pn << PTE_PPN0_S); 1273 pmap_load_store(l1, entry); 1274 pmap_distribute_l1(pmap, l1index, entry); 1275 1276 PTE_SYNC(l1); 1277 1278 } else { 1279 vm_pindex_t l1index; 1280 pd_entry_t *l1, *l2; 1281 1282 l1index = ptepindex >> (L1_SHIFT - L2_SHIFT); 1283 l1 = &pmap->pm_l1[l1index]; 1284 if (pmap_load(l1) == 0) { 1285 /* recurse for allocating page dir */ 1286 if (_pmap_alloc_l3(pmap, NUPDE + l1index, 1287 lockp) == NULL) { 1288 vm_page_unwire_noq(m); 1289 vm_page_free_zero(m); 1290 return (NULL); 1291 } 1292 } else { 1293 phys = PTE_TO_PHYS(pmap_load(l1)); 1294 pdpg = PHYS_TO_VM_PAGE(phys); 1295 pdpg->wire_count++; 1296 } 1297 1298 phys = PTE_TO_PHYS(pmap_load(l1)); 1299 l2 = (pd_entry_t *)PHYS_TO_DMAP(phys); 1300 l2 = &l2[ptepindex & Ln_ADDR_MASK]; 1301 1302 pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE); 1303 entry = (PTE_V); 1304 entry |= (pn << PTE_PPN0_S); 1305 pmap_load_store(l2, entry); 1306 1307 PTE_SYNC(l2); 1308 } 1309 1310 pmap_resident_count_inc(pmap, 1); 1311 1312 return (m); 1313 } 1314 1315 static vm_page_t 1316 pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) 1317 { 1318 vm_pindex_t ptepindex; 1319 pd_entry_t *l2; 1320 vm_paddr_t phys; 1321 vm_page_t m; 1322 1323 /* 1324 * Calculate pagetable page index 1325 */ 1326 ptepindex = pmap_l2_pindex(va); 1327 retry: 1328 /* 1329 * Get the page directory entry 1330 */ 1331 l2 = pmap_l2(pmap, va); 1332 1333 /* 1334 * If the page table page is mapped, we just increment the 1335 * hold count, and activate it. 1336 */ 1337 if (l2 != NULL && pmap_load(l2) != 0) { 1338 phys = PTE_TO_PHYS(pmap_load(l2)); 1339 m = PHYS_TO_VM_PAGE(phys); 1340 m->wire_count++; 1341 } else { 1342 /* 1343 * Here if the pte page isn't mapped, or if it has been 1344 * deallocated. 1345 */ 1346 m = _pmap_alloc_l3(pmap, ptepindex, lockp); 1347 if (m == NULL && lockp != NULL) 1348 goto retry; 1349 } 1350 return (m); 1351 } 1352 1353 1354 /*************************************************** 1355 * Pmap allocation/deallocation routines. 1356 ***************************************************/ 1357 1358 /* 1359 * Release any resources held by the given physical map. 1360 * Called when a pmap initialized by pmap_pinit is being released. 1361 * Should only be called if the map contains no valid mappings. 1362 */ 1363 void 1364 pmap_release(pmap_t pmap) 1365 { 1366 vm_page_t m; 1367 1368 KASSERT(pmap->pm_stats.resident_count == 0, 1369 ("pmap_release: pmap resident count %ld != 0", 1370 pmap->pm_stats.resident_count)); 1371 1372 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1)); 1373 vm_page_unwire_noq(m); 1374 vm_page_free_zero(m); 1375 1376 /* Remove pmap from the allpmaps list */ 1377 LIST_REMOVE(pmap, pm_list); 1378 1379 /* Remove kernel pagetables */ 1380 bzero(pmap->pm_l1, PAGE_SIZE); 1381 } 1382 1383 #if 0 1384 static int 1385 kvm_size(SYSCTL_HANDLER_ARGS) 1386 { 1387 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; 1388 1389 return sysctl_handle_long(oidp, &ksize, 0, req); 1390 } 1391 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1392 0, 0, kvm_size, "LU", "Size of KVM"); 1393 1394 static int 1395 kvm_free(SYSCTL_HANDLER_ARGS) 1396 { 1397 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1398 1399 return sysctl_handle_long(oidp, &kfree, 0, req); 1400 } 1401 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1402 0, 0, kvm_free, "LU", "Amount of KVM free"); 1403 #endif /* 0 */ 1404 1405 /* 1406 * grow the number of kernel page table entries, if needed 1407 */ 1408 void 1409 pmap_growkernel(vm_offset_t addr) 1410 { 1411 vm_paddr_t paddr; 1412 vm_page_t nkpg; 1413 pd_entry_t *l1, *l2; 1414 pt_entry_t entry; 1415 pn_t pn; 1416 1417 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1418 1419 addr = roundup2(addr, L2_SIZE); 1420 if (addr - 1 >= kernel_map->max_offset) 1421 addr = kernel_map->max_offset; 1422 while (kernel_vm_end < addr) { 1423 l1 = pmap_l1(kernel_pmap, kernel_vm_end); 1424 if (pmap_load(l1) == 0) { 1425 /* We need a new PDP entry */ 1426 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, 1427 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | 1428 VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1429 if (nkpg == NULL) 1430 panic("pmap_growkernel: no memory to grow kernel"); 1431 if ((nkpg->flags & PG_ZERO) == 0) 1432 pmap_zero_page(nkpg); 1433 paddr = VM_PAGE_TO_PHYS(nkpg); 1434 1435 pn = (paddr / PAGE_SIZE); 1436 entry = (PTE_V); 1437 entry |= (pn << PTE_PPN0_S); 1438 pmap_load_store(l1, entry); 1439 pmap_distribute_l1(kernel_pmap, 1440 pmap_l1_index(kernel_vm_end), entry); 1441 1442 PTE_SYNC(l1); 1443 continue; /* try again */ 1444 } 1445 l2 = pmap_l1_to_l2(l1, kernel_vm_end); 1446 if ((pmap_load(l2) & PTE_A) != 0) { 1447 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1448 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1449 kernel_vm_end = kernel_map->max_offset; 1450 break; 1451 } 1452 continue; 1453 } 1454 1455 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, 1456 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1457 VM_ALLOC_ZERO); 1458 if (nkpg == NULL) 1459 panic("pmap_growkernel: no memory to grow kernel"); 1460 if ((nkpg->flags & PG_ZERO) == 0) { 1461 pmap_zero_page(nkpg); 1462 } 1463 paddr = VM_PAGE_TO_PHYS(nkpg); 1464 1465 pn = (paddr / PAGE_SIZE); 1466 entry = (PTE_V); 1467 entry |= (pn << PTE_PPN0_S); 1468 pmap_load_store(l2, entry); 1469 1470 PTE_SYNC(l2); 1471 pmap_invalidate_page(kernel_pmap, kernel_vm_end); 1472 1473 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1474 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1475 kernel_vm_end = kernel_map->max_offset; 1476 break; 1477 } 1478 } 1479 } 1480 1481 1482 /*************************************************** 1483 * page management routines. 1484 ***************************************************/ 1485 1486 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1487 CTASSERT(_NPCM == 3); 1488 CTASSERT(_NPCPV == 168); 1489 1490 static __inline struct pv_chunk * 1491 pv_to_chunk(pv_entry_t pv) 1492 { 1493 1494 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 1495 } 1496 1497 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1498 1499 #define PC_FREE0 0xfffffffffffffffful 1500 #define PC_FREE1 0xfffffffffffffffful 1501 #define PC_FREE2 0x000000fffffffffful 1502 1503 static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; 1504 1505 #if 0 1506 #ifdef PV_STATS 1507 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1508 1509 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1510 "Current number of pv entry chunks"); 1511 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1512 "Current number of pv entry chunks allocated"); 1513 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1514 "Current number of pv entry chunks frees"); 1515 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1516 "Number of times tried to get a chunk page but failed."); 1517 1518 static long pv_entry_frees, pv_entry_allocs, pv_entry_count; 1519 static int pv_entry_spare; 1520 1521 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1522 "Current number of pv entry frees"); 1523 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1524 "Current number of pv entry allocs"); 1525 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1526 "Current number of pv entries"); 1527 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1528 "Current number of spare pv entries"); 1529 #endif 1530 #endif /* 0 */ 1531 1532 /* 1533 * We are in a serious low memory condition. Resort to 1534 * drastic measures to free some pages so we can allocate 1535 * another pv entry chunk. 1536 * 1537 * Returns NULL if PV entries were reclaimed from the specified pmap. 1538 * 1539 * We do not, however, unmap 2mpages because subsequent accesses will 1540 * allocate per-page pv entries until repromotion occurs, thereby 1541 * exacerbating the shortage of free pv entries. 1542 */ 1543 static vm_page_t 1544 reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) 1545 { 1546 1547 panic("RISCVTODO: reclaim_pv_chunk"); 1548 } 1549 1550 /* 1551 * free the pv_entry back to the free list 1552 */ 1553 static void 1554 free_pv_entry(pmap_t pmap, pv_entry_t pv) 1555 { 1556 struct pv_chunk *pc; 1557 int idx, field, bit; 1558 1559 rw_assert(&pvh_global_lock, RA_LOCKED); 1560 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1561 PV_STAT(atomic_add_long(&pv_entry_frees, 1)); 1562 PV_STAT(atomic_add_int(&pv_entry_spare, 1)); 1563 PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); 1564 pc = pv_to_chunk(pv); 1565 idx = pv - &pc->pc_pventry[0]; 1566 field = idx / 64; 1567 bit = idx % 64; 1568 pc->pc_map[field] |= 1ul << bit; 1569 if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || 1570 pc->pc_map[2] != PC_FREE2) { 1571 /* 98% of the time, pc is already at the head of the list. */ 1572 if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { 1573 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1574 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1575 } 1576 return; 1577 } 1578 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1579 free_pv_chunk(pc); 1580 } 1581 1582 static void 1583 free_pv_chunk(struct pv_chunk *pc) 1584 { 1585 vm_page_t m; 1586 1587 mtx_lock(&pv_chunks_mutex); 1588 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1589 mtx_unlock(&pv_chunks_mutex); 1590 PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); 1591 PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); 1592 PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); 1593 /* entire chunk is free, return it */ 1594 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); 1595 #if 0 /* TODO: For minidump */ 1596 dump_drop_page(m->phys_addr); 1597 #endif 1598 vm_page_unwire(m, PQ_NONE); 1599 vm_page_free(m); 1600 } 1601 1602 /* 1603 * Returns a new PV entry, allocating a new PV chunk from the system when 1604 * needed. If this PV chunk allocation fails and a PV list lock pointer was 1605 * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is 1606 * returned. 1607 * 1608 * The given PV list lock may be released. 1609 */ 1610 static pv_entry_t 1611 get_pv_entry(pmap_t pmap, struct rwlock **lockp) 1612 { 1613 int bit, field; 1614 pv_entry_t pv; 1615 struct pv_chunk *pc; 1616 vm_page_t m; 1617 1618 rw_assert(&pvh_global_lock, RA_LOCKED); 1619 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1620 PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); 1621 retry: 1622 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 1623 if (pc != NULL) { 1624 for (field = 0; field < _NPCM; field++) { 1625 if (pc->pc_map[field]) { 1626 bit = ffsl(pc->pc_map[field]) - 1; 1627 break; 1628 } 1629 } 1630 if (field < _NPCM) { 1631 pv = &pc->pc_pventry[field * 64 + bit]; 1632 pc->pc_map[field] &= ~(1ul << bit); 1633 /* If this was the last item, move it to tail */ 1634 if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && 1635 pc->pc_map[2] == 0) { 1636 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1637 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, 1638 pc_list); 1639 } 1640 PV_STAT(atomic_add_long(&pv_entry_count, 1)); 1641 PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); 1642 return (pv); 1643 } 1644 } 1645 /* No free items, allocate another chunk */ 1646 m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 1647 VM_ALLOC_WIRED); 1648 if (m == NULL) { 1649 if (lockp == NULL) { 1650 PV_STAT(pc_chunk_tryfail++); 1651 return (NULL); 1652 } 1653 m = reclaim_pv_chunk(pmap, lockp); 1654 if (m == NULL) 1655 goto retry; 1656 } 1657 PV_STAT(atomic_add_int(&pc_chunk_count, 1)); 1658 PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); 1659 #if 0 /* TODO: This is for minidump */ 1660 dump_add_page(m->phys_addr); 1661 #endif 1662 pc = (void *)PHYS_TO_DMAP(m->phys_addr); 1663 pc->pc_pmap = pmap; 1664 pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ 1665 pc->pc_map[1] = PC_FREE1; 1666 pc->pc_map[2] = PC_FREE2; 1667 mtx_lock(&pv_chunks_mutex); 1668 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 1669 mtx_unlock(&pv_chunks_mutex); 1670 pv = &pc->pc_pventry[0]; 1671 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1672 PV_STAT(atomic_add_long(&pv_entry_count, 1)); 1673 PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); 1674 return (pv); 1675 } 1676 1677 /* 1678 * First find and then remove the pv entry for the specified pmap and virtual 1679 * address from the specified pv list. Returns the pv entry if found and NULL 1680 * otherwise. This operation can be performed on pv lists for either 4KB or 1681 * 2MB page mappings. 1682 */ 1683 static __inline pv_entry_t 1684 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1685 { 1686 pv_entry_t pv; 1687 1688 rw_assert(&pvh_global_lock, RA_LOCKED); 1689 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 1690 if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 1691 TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 1692 pvh->pv_gen++; 1693 break; 1694 } 1695 } 1696 return (pv); 1697 } 1698 1699 /* 1700 * First find and then destroy the pv entry for the specified pmap and virtual 1701 * address. This operation can be performed on pv lists for either 4KB or 2MB 1702 * page mappings. 1703 */ 1704 static void 1705 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1706 { 1707 pv_entry_t pv; 1708 1709 pv = pmap_pvh_remove(pvh, pmap, va); 1710 1711 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 1712 free_pv_entry(pmap, pv); 1713 } 1714 1715 /* 1716 * Conditionally create the PV entry for a 4KB page mapping if the required 1717 * memory can be allocated without resorting to reclamation. 1718 */ 1719 static boolean_t 1720 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, 1721 struct rwlock **lockp) 1722 { 1723 pv_entry_t pv; 1724 1725 rw_assert(&pvh_global_lock, RA_LOCKED); 1726 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1727 /* Pass NULL instead of the lock pointer to disable reclamation. */ 1728 if ((pv = get_pv_entry(pmap, NULL)) != NULL) { 1729 pv->pv_va = va; 1730 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1731 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 1732 m->md.pv_gen++; 1733 return (TRUE); 1734 } else 1735 return (FALSE); 1736 } 1737 1738 /* 1739 * pmap_remove_l3: do the things to unmap a page in a process 1740 */ 1741 static int 1742 pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, 1743 pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) 1744 { 1745 pt_entry_t old_l3; 1746 vm_paddr_t phys; 1747 vm_page_t m; 1748 1749 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1750 if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) 1751 cpu_dcache_wb_range(va, L3_SIZE); 1752 old_l3 = pmap_load_clear(l3); 1753 PTE_SYNC(l3); 1754 pmap_invalidate_page(pmap, va); 1755 if (old_l3 & PTE_SW_WIRED) 1756 pmap->pm_stats.wired_count -= 1; 1757 pmap_resident_count_dec(pmap, 1); 1758 if (old_l3 & PTE_SW_MANAGED) { 1759 phys = PTE_TO_PHYS(old_l3); 1760 m = PHYS_TO_VM_PAGE(phys); 1761 if (pmap_page_dirty(old_l3)) 1762 vm_page_dirty(m); 1763 if (old_l3 & PTE_A) 1764 vm_page_aflag_set(m, PGA_REFERENCED); 1765 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1766 pmap_pvh_free(&m->md, pmap, va); 1767 } 1768 1769 return (pmap_unuse_l3(pmap, va, l2e, free)); 1770 } 1771 1772 /* 1773 * Remove the given range of addresses from the specified map. 1774 * 1775 * It is assumed that the start and end are properly 1776 * rounded to the page size. 1777 */ 1778 void 1779 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1780 { 1781 struct rwlock *lock; 1782 vm_offset_t va, va_next; 1783 pd_entry_t *l1, *l2; 1784 pt_entry_t l3_pte, *l3; 1785 struct spglist free; 1786 1787 /* 1788 * Perform an unsynchronized read. This is, however, safe. 1789 */ 1790 if (pmap->pm_stats.resident_count == 0) 1791 return; 1792 1793 SLIST_INIT(&free); 1794 1795 rw_rlock(&pvh_global_lock); 1796 PMAP_LOCK(pmap); 1797 1798 lock = NULL; 1799 for (; sva < eva; sva = va_next) { 1800 if (pmap->pm_stats.resident_count == 0) 1801 break; 1802 1803 l1 = pmap_l1(pmap, sva); 1804 if (pmap_load(l1) == 0) { 1805 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 1806 if (va_next < sva) 1807 va_next = eva; 1808 continue; 1809 } 1810 1811 /* 1812 * Calculate index for next page table. 1813 */ 1814 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 1815 if (va_next < sva) 1816 va_next = eva; 1817 1818 l2 = pmap_l1_to_l2(l1, sva); 1819 if (l2 == NULL) 1820 continue; 1821 1822 l3_pte = pmap_load(l2); 1823 1824 /* 1825 * Weed out invalid mappings. 1826 */ 1827 if (l3_pte == 0) 1828 continue; 1829 if ((pmap_load(l2) & PTE_RX) != 0) 1830 continue; 1831 1832 /* 1833 * Limit our scan to either the end of the va represented 1834 * by the current page table page, or to the end of the 1835 * range being removed. 1836 */ 1837 if (va_next > eva) 1838 va_next = eva; 1839 1840 va = va_next; 1841 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 1842 sva += L3_SIZE) { 1843 if (l3 == NULL) 1844 panic("l3 == NULL"); 1845 if (pmap_load(l3) == 0) { 1846 if (va != va_next) { 1847 pmap_invalidate_range(pmap, va, sva); 1848 va = va_next; 1849 } 1850 continue; 1851 } 1852 if (va == va_next) 1853 va = sva; 1854 if (pmap_remove_l3(pmap, l3, sva, l3_pte, &free, 1855 &lock)) { 1856 sva += L3_SIZE; 1857 break; 1858 } 1859 } 1860 if (va != va_next) 1861 pmap_invalidate_range(pmap, va, sva); 1862 } 1863 if (lock != NULL) 1864 rw_wunlock(lock); 1865 rw_runlock(&pvh_global_lock); 1866 PMAP_UNLOCK(pmap); 1867 vm_page_free_pages_toq(&free, false); 1868 } 1869 1870 /* 1871 * Routine: pmap_remove_all 1872 * Function: 1873 * Removes this physical page from 1874 * all physical maps in which it resides. 1875 * Reflects back modify bits to the pager. 1876 * 1877 * Notes: 1878 * Original versions of this routine were very 1879 * inefficient because they iteratively called 1880 * pmap_remove (slow...) 1881 */ 1882 1883 void 1884 pmap_remove_all(vm_page_t m) 1885 { 1886 pv_entry_t pv; 1887 pmap_t pmap; 1888 pt_entry_t *l3, tl3; 1889 pd_entry_t *l2, tl2; 1890 struct spglist free; 1891 1892 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1893 ("pmap_remove_all: page %p is not managed", m)); 1894 SLIST_INIT(&free); 1895 rw_wlock(&pvh_global_lock); 1896 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1897 pmap = PV_PMAP(pv); 1898 PMAP_LOCK(pmap); 1899 pmap_resident_count_dec(pmap, 1); 1900 l2 = pmap_l2(pmap, pv->pv_va); 1901 KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found")); 1902 tl2 = pmap_load(l2); 1903 1904 KASSERT((tl2 & PTE_RX) == 0, 1905 ("pmap_remove_all: found a table when expecting " 1906 "a block in %p's pv list", m)); 1907 1908 l3 = pmap_l2_to_l3(l2, pv->pv_va); 1909 if (pmap_is_current(pmap) && 1910 pmap_l3_valid_cacheable(pmap_load(l3))) 1911 cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 1912 tl3 = pmap_load_clear(l3); 1913 PTE_SYNC(l3); 1914 pmap_invalidate_page(pmap, pv->pv_va); 1915 if (tl3 & PTE_SW_WIRED) 1916 pmap->pm_stats.wired_count--; 1917 if ((tl3 & PTE_A) != 0) 1918 vm_page_aflag_set(m, PGA_REFERENCED); 1919 1920 /* 1921 * Update the vm_page_t clean and reference bits. 1922 */ 1923 if (pmap_page_dirty(tl3)) 1924 vm_page_dirty(m); 1925 pmap_unuse_l3(pmap, pv->pv_va, pmap_load(l2), &free); 1926 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 1927 m->md.pv_gen++; 1928 free_pv_entry(pmap, pv); 1929 PMAP_UNLOCK(pmap); 1930 } 1931 vm_page_aflag_clear(m, PGA_WRITEABLE); 1932 rw_wunlock(&pvh_global_lock); 1933 vm_page_free_pages_toq(&free, false); 1934 } 1935 1936 /* 1937 * Set the physical protection on the 1938 * specified range of this map as requested. 1939 */ 1940 void 1941 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1942 { 1943 vm_offset_t va, va_next; 1944 pd_entry_t *l1, *l2; 1945 pt_entry_t *l3p, l3; 1946 pt_entry_t entry; 1947 1948 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1949 pmap_remove(pmap, sva, eva); 1950 return; 1951 } 1952 1953 if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE) 1954 return; 1955 1956 PMAP_LOCK(pmap); 1957 for (; sva < eva; sva = va_next) { 1958 1959 l1 = pmap_l1(pmap, sva); 1960 if (pmap_load(l1) == 0) { 1961 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 1962 if (va_next < sva) 1963 va_next = eva; 1964 continue; 1965 } 1966 1967 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 1968 if (va_next < sva) 1969 va_next = eva; 1970 1971 l2 = pmap_l1_to_l2(l1, sva); 1972 if (l2 == NULL) 1973 continue; 1974 if (pmap_load(l2) == 0) 1975 continue; 1976 if ((pmap_load(l2) & PTE_RX) != 0) 1977 continue; 1978 1979 if (va_next > eva) 1980 va_next = eva; 1981 1982 va = va_next; 1983 for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, 1984 sva += L3_SIZE) { 1985 l3 = pmap_load(l3p); 1986 if (pmap_l3_valid(l3)) { 1987 entry = pmap_load(l3p); 1988 entry &= ~(PTE_W); 1989 pmap_load_store(l3p, entry); 1990 PTE_SYNC(l3p); 1991 /* XXX: Use pmap_invalidate_range */ 1992 pmap_invalidate_page(pmap, sva); 1993 } 1994 } 1995 } 1996 PMAP_UNLOCK(pmap); 1997 } 1998 1999 /* 2000 * Insert the given physical page (p) at 2001 * the specified virtual address (v) in the 2002 * target physical map with the protection requested. 2003 * 2004 * If specified, the page will be wired down, meaning 2005 * that the related pte can not be reclaimed. 2006 * 2007 * NB: This is the only routine which MAY NOT lazy-evaluate 2008 * or lose information. That is, this routine must actually 2009 * insert this page into the given map NOW. 2010 */ 2011 int 2012 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2013 u_int flags, int8_t psind __unused) 2014 { 2015 struct rwlock *lock; 2016 pd_entry_t *l1, *l2; 2017 pt_entry_t new_l3, orig_l3; 2018 pt_entry_t *l3; 2019 pv_entry_t pv; 2020 vm_paddr_t opa, pa, l2_pa, l3_pa; 2021 vm_page_t mpte, om, l2_m, l3_m; 2022 boolean_t nosleep; 2023 pt_entry_t entry; 2024 pn_t l2_pn; 2025 pn_t l3_pn; 2026 pn_t pn; 2027 2028 va = trunc_page(va); 2029 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 2030 VM_OBJECT_ASSERT_LOCKED(m->object); 2031 pa = VM_PAGE_TO_PHYS(m); 2032 pn = (pa / PAGE_SIZE); 2033 2034 new_l3 = PTE_V | PTE_R | PTE_X; 2035 if (prot & VM_PROT_WRITE) 2036 new_l3 |= PTE_W; 2037 if ((va >> 63) == 0) 2038 new_l3 |= PTE_U; 2039 2040 new_l3 |= (pn << PTE_PPN0_S); 2041 if ((flags & PMAP_ENTER_WIRED) != 0) 2042 new_l3 |= PTE_SW_WIRED; 2043 2044 CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); 2045 2046 mpte = NULL; 2047 2048 lock = NULL; 2049 rw_rlock(&pvh_global_lock); 2050 PMAP_LOCK(pmap); 2051 2052 if (va < VM_MAXUSER_ADDRESS) { 2053 nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; 2054 mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); 2055 if (mpte == NULL && nosleep) { 2056 CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); 2057 if (lock != NULL) 2058 rw_wunlock(lock); 2059 rw_runlock(&pvh_global_lock); 2060 PMAP_UNLOCK(pmap); 2061 return (KERN_RESOURCE_SHORTAGE); 2062 } 2063 l3 = pmap_l3(pmap, va); 2064 } else { 2065 l3 = pmap_l3(pmap, va); 2066 /* TODO: This is not optimal, but should mostly work */ 2067 if (l3 == NULL) { 2068 l2 = pmap_l2(pmap, va); 2069 if (l2 == NULL) { 2070 l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2071 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2072 VM_ALLOC_ZERO); 2073 if (l2_m == NULL) 2074 panic("pmap_enter: l2 pte_m == NULL"); 2075 if ((l2_m->flags & PG_ZERO) == 0) 2076 pmap_zero_page(l2_m); 2077 2078 l2_pa = VM_PAGE_TO_PHYS(l2_m); 2079 l2_pn = (l2_pa / PAGE_SIZE); 2080 2081 l1 = pmap_l1(pmap, va); 2082 entry = (PTE_V); 2083 entry |= (l2_pn << PTE_PPN0_S); 2084 pmap_load_store(l1, entry); 2085 pmap_distribute_l1(pmap, pmap_l1_index(va), entry); 2086 PTE_SYNC(l1); 2087 2088 l2 = pmap_l1_to_l2(l1, va); 2089 } 2090 2091 KASSERT(l2 != NULL, 2092 ("No l2 table after allocating one")); 2093 2094 l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2095 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); 2096 if (l3_m == NULL) 2097 panic("pmap_enter: l3 pte_m == NULL"); 2098 if ((l3_m->flags & PG_ZERO) == 0) 2099 pmap_zero_page(l3_m); 2100 2101 l3_pa = VM_PAGE_TO_PHYS(l3_m); 2102 l3_pn = (l3_pa / PAGE_SIZE); 2103 entry = (PTE_V); 2104 entry |= (l3_pn << PTE_PPN0_S); 2105 pmap_load_store(l2, entry); 2106 PTE_SYNC(l2); 2107 l3 = pmap_l2_to_l3(l2, va); 2108 } 2109 pmap_invalidate_page(pmap, va); 2110 } 2111 2112 om = NULL; 2113 orig_l3 = pmap_load(l3); 2114 opa = PTE_TO_PHYS(orig_l3); 2115 2116 /* 2117 * Is the specified virtual address already mapped? 2118 */ 2119 if (pmap_l3_valid(orig_l3)) { 2120 /* 2121 * Wiring change, just update stats. We don't worry about 2122 * wiring PT pages as they remain resident as long as there 2123 * are valid mappings in them. Hence, if a user page is wired, 2124 * the PT page will be also. 2125 */ 2126 if ((flags & PMAP_ENTER_WIRED) != 0 && 2127 (orig_l3 & PTE_SW_WIRED) == 0) 2128 pmap->pm_stats.wired_count++; 2129 else if ((flags & PMAP_ENTER_WIRED) == 0 && 2130 (orig_l3 & PTE_SW_WIRED) != 0) 2131 pmap->pm_stats.wired_count--; 2132 2133 /* 2134 * Remove the extra PT page reference. 2135 */ 2136 if (mpte != NULL) { 2137 mpte->wire_count--; 2138 KASSERT(mpte->wire_count > 0, 2139 ("pmap_enter: missing reference to page table page," 2140 " va: 0x%lx", va)); 2141 } 2142 2143 /* 2144 * Has the physical page changed? 2145 */ 2146 if (opa == pa) { 2147 /* 2148 * No, might be a protection or wiring change. 2149 */ 2150 if ((orig_l3 & PTE_SW_MANAGED) != 0) { 2151 new_l3 |= PTE_SW_MANAGED; 2152 if (pmap_is_write(new_l3)) 2153 vm_page_aflag_set(m, PGA_WRITEABLE); 2154 } 2155 goto validate; 2156 } 2157 2158 /* Flush the cache, there might be uncommitted data in it */ 2159 if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3)) 2160 cpu_dcache_wb_range(va, L3_SIZE); 2161 } else { 2162 /* 2163 * Increment the counters. 2164 */ 2165 if ((new_l3 & PTE_SW_WIRED) != 0) 2166 pmap->pm_stats.wired_count++; 2167 pmap_resident_count_inc(pmap, 1); 2168 } 2169 /* 2170 * Enter on the PV list if part of our managed memory. 2171 */ 2172 if ((m->oflags & VPO_UNMANAGED) == 0) { 2173 new_l3 |= PTE_SW_MANAGED; 2174 pv = get_pv_entry(pmap, &lock); 2175 pv->pv_va = va; 2176 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); 2177 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2178 m->md.pv_gen++; 2179 if (pmap_is_write(new_l3)) 2180 vm_page_aflag_set(m, PGA_WRITEABLE); 2181 } 2182 2183 /* 2184 * Update the L3 entry. 2185 */ 2186 if (orig_l3 != 0) { 2187 validate: 2188 orig_l3 = pmap_load_store(l3, new_l3); 2189 PTE_SYNC(l3); 2190 opa = PTE_TO_PHYS(orig_l3); 2191 2192 if (opa != pa) { 2193 if ((orig_l3 & PTE_SW_MANAGED) != 0) { 2194 om = PHYS_TO_VM_PAGE(opa); 2195 if (pmap_page_dirty(orig_l3)) 2196 vm_page_dirty(om); 2197 if ((orig_l3 & PTE_A) != 0) 2198 vm_page_aflag_set(om, PGA_REFERENCED); 2199 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); 2200 pmap_pvh_free(&om->md, pmap, va); 2201 } 2202 } else if (pmap_page_dirty(orig_l3)) { 2203 if ((orig_l3 & PTE_SW_MANAGED) != 0) 2204 vm_page_dirty(m); 2205 } 2206 } else { 2207 pmap_load_store(l3, new_l3); 2208 PTE_SYNC(l3); 2209 } 2210 pmap_invalidate_page(pmap, va); 2211 if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) 2212 cpu_icache_sync_range(va, PAGE_SIZE); 2213 2214 if (lock != NULL) 2215 rw_wunlock(lock); 2216 rw_runlock(&pvh_global_lock); 2217 PMAP_UNLOCK(pmap); 2218 return (KERN_SUCCESS); 2219 } 2220 2221 /* 2222 * Maps a sequence of resident pages belonging to the same object. 2223 * The sequence begins with the given page m_start. This page is 2224 * mapped at the given virtual address start. Each subsequent page is 2225 * mapped at a virtual address that is offset from start by the same 2226 * amount as the page is offset from m_start within the object. The 2227 * last page in the sequence is the page with the largest offset from 2228 * m_start that can be mapped at a virtual address less than the given 2229 * virtual address end. Not every virtual page between start and end 2230 * is mapped; only those for which a resident page exists with the 2231 * corresponding offset from m_start are mapped. 2232 */ 2233 void 2234 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2235 vm_page_t m_start, vm_prot_t prot) 2236 { 2237 struct rwlock *lock; 2238 vm_offset_t va; 2239 vm_page_t m, mpte; 2240 vm_pindex_t diff, psize; 2241 2242 VM_OBJECT_ASSERT_LOCKED(m_start->object); 2243 2244 psize = atop(end - start); 2245 mpte = NULL; 2246 m = m_start; 2247 lock = NULL; 2248 rw_rlock(&pvh_global_lock); 2249 PMAP_LOCK(pmap); 2250 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2251 va = start + ptoa(diff); 2252 mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); 2253 m = TAILQ_NEXT(m, listq); 2254 } 2255 if (lock != NULL) 2256 rw_wunlock(lock); 2257 rw_runlock(&pvh_global_lock); 2258 PMAP_UNLOCK(pmap); 2259 } 2260 2261 /* 2262 * this code makes some *MAJOR* assumptions: 2263 * 1. Current pmap & pmap exists. 2264 * 2. Not wired. 2265 * 3. Read access. 2266 * 4. No page table pages. 2267 * but is *MUCH* faster than pmap_enter... 2268 */ 2269 2270 void 2271 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 2272 { 2273 struct rwlock *lock; 2274 2275 lock = NULL; 2276 rw_rlock(&pvh_global_lock); 2277 PMAP_LOCK(pmap); 2278 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); 2279 if (lock != NULL) 2280 rw_wunlock(lock); 2281 rw_runlock(&pvh_global_lock); 2282 PMAP_UNLOCK(pmap); 2283 } 2284 2285 static vm_page_t 2286 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 2287 vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) 2288 { 2289 struct spglist free; 2290 vm_paddr_t phys; 2291 pd_entry_t *l2; 2292 pt_entry_t *l3; 2293 vm_paddr_t pa; 2294 pt_entry_t entry; 2295 pn_t pn; 2296 2297 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 2298 (m->oflags & VPO_UNMANAGED) != 0, 2299 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 2300 rw_assert(&pvh_global_lock, RA_LOCKED); 2301 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2302 2303 CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); 2304 /* 2305 * In the case that a page table page is not 2306 * resident, we are creating it here. 2307 */ 2308 if (va < VM_MAXUSER_ADDRESS) { 2309 vm_pindex_t l2pindex; 2310 2311 /* 2312 * Calculate pagetable page index 2313 */ 2314 l2pindex = pmap_l2_pindex(va); 2315 if (mpte && (mpte->pindex == l2pindex)) { 2316 mpte->wire_count++; 2317 } else { 2318 /* 2319 * Get the l2 entry 2320 */ 2321 l2 = pmap_l2(pmap, va); 2322 2323 /* 2324 * If the page table page is mapped, we just increment 2325 * the hold count, and activate it. Otherwise, we 2326 * attempt to allocate a page table page. If this 2327 * attempt fails, we don't retry. Instead, we give up. 2328 */ 2329 if (l2 != NULL && pmap_load(l2) != 0) { 2330 phys = PTE_TO_PHYS(pmap_load(l2)); 2331 mpte = PHYS_TO_VM_PAGE(phys); 2332 mpte->wire_count++; 2333 } else { 2334 /* 2335 * Pass NULL instead of the PV list lock 2336 * pointer, because we don't intend to sleep. 2337 */ 2338 mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); 2339 if (mpte == NULL) 2340 return (mpte); 2341 } 2342 } 2343 l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); 2344 l3 = &l3[pmap_l3_index(va)]; 2345 } else { 2346 mpte = NULL; 2347 l3 = pmap_l3(kernel_pmap, va); 2348 } 2349 if (l3 == NULL) 2350 panic("pmap_enter_quick_locked: No l3"); 2351 if (pmap_load(l3) != 0) { 2352 if (mpte != NULL) { 2353 mpte->wire_count--; 2354 mpte = NULL; 2355 } 2356 return (mpte); 2357 } 2358 2359 /* 2360 * Enter on the PV list if part of our managed memory. 2361 */ 2362 if ((m->oflags & VPO_UNMANAGED) == 0 && 2363 !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { 2364 if (mpte != NULL) { 2365 SLIST_INIT(&free); 2366 if (pmap_unwire_l3(pmap, va, mpte, &free)) { 2367 pmap_invalidate_page(pmap, va); 2368 vm_page_free_pages_toq(&free, false); 2369 } 2370 mpte = NULL; 2371 } 2372 return (mpte); 2373 } 2374 2375 /* 2376 * Increment counters 2377 */ 2378 pmap_resident_count_inc(pmap, 1); 2379 2380 pa = VM_PAGE_TO_PHYS(m); 2381 pn = (pa / PAGE_SIZE); 2382 2383 /* RISCVTODO: check permissions */ 2384 entry = (PTE_V | PTE_RWX); 2385 entry |= (pn << PTE_PPN0_S); 2386 2387 /* 2388 * Now validate mapping with RO protection 2389 */ 2390 if ((m->oflags & VPO_UNMANAGED) == 0) 2391 entry |= PTE_SW_MANAGED; 2392 pmap_load_store(l3, entry); 2393 2394 PTE_SYNC(l3); 2395 pmap_invalidate_page(pmap, va); 2396 return (mpte); 2397 } 2398 2399 /* 2400 * This code maps large physical mmap regions into the 2401 * processor address space. Note that some shortcuts 2402 * are taken, but the code works. 2403 */ 2404 void 2405 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 2406 vm_pindex_t pindex, vm_size_t size) 2407 { 2408 2409 VM_OBJECT_ASSERT_WLOCKED(object); 2410 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 2411 ("pmap_object_init_pt: non-device object")); 2412 } 2413 2414 /* 2415 * Clear the wired attribute from the mappings for the specified range of 2416 * addresses in the given pmap. Every valid mapping within that range 2417 * must have the wired attribute set. In contrast, invalid mappings 2418 * cannot have the wired attribute set, so they are ignored. 2419 * 2420 * The wired attribute of the page table entry is not a hardware feature, 2421 * so there is no need to invalidate any TLB entries. 2422 */ 2423 void 2424 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2425 { 2426 vm_offset_t va_next; 2427 pd_entry_t *l1, *l2; 2428 pt_entry_t *l3; 2429 boolean_t pv_lists_locked; 2430 2431 pv_lists_locked = FALSE; 2432 PMAP_LOCK(pmap); 2433 for (; sva < eva; sva = va_next) { 2434 l1 = pmap_l1(pmap, sva); 2435 if (pmap_load(l1) == 0) { 2436 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2437 if (va_next < sva) 2438 va_next = eva; 2439 continue; 2440 } 2441 2442 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2443 if (va_next < sva) 2444 va_next = eva; 2445 2446 l2 = pmap_l1_to_l2(l1, sva); 2447 if (pmap_load(l2) == 0) 2448 continue; 2449 2450 if (va_next > eva) 2451 va_next = eva; 2452 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 2453 sva += L3_SIZE) { 2454 if (pmap_load(l3) == 0) 2455 continue; 2456 if ((pmap_load(l3) & PTE_SW_WIRED) == 0) 2457 panic("pmap_unwire: l3 %#jx is missing " 2458 "PTE_SW_WIRED", (uintmax_t)*l3); 2459 2460 /* 2461 * PG_W must be cleared atomically. Although the pmap 2462 * lock synchronizes access to PG_W, another processor 2463 * could be setting PG_M and/or PG_A concurrently. 2464 */ 2465 atomic_clear_long(l3, PTE_SW_WIRED); 2466 pmap->pm_stats.wired_count--; 2467 } 2468 } 2469 if (pv_lists_locked) 2470 rw_runlock(&pvh_global_lock); 2471 PMAP_UNLOCK(pmap); 2472 } 2473 2474 /* 2475 * Copy the range specified by src_addr/len 2476 * from the source map to the range dst_addr/len 2477 * in the destination map. 2478 * 2479 * This routine is only advisory and need not do anything. 2480 */ 2481 2482 void 2483 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2484 vm_offset_t src_addr) 2485 { 2486 2487 } 2488 2489 /* 2490 * pmap_zero_page zeros the specified hardware page by mapping 2491 * the page into KVM and using bzero to clear its contents. 2492 */ 2493 void 2494 pmap_zero_page(vm_page_t m) 2495 { 2496 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2497 2498 pagezero((void *)va); 2499 } 2500 2501 /* 2502 * pmap_zero_page_area zeros the specified hardware page by mapping 2503 * the page into KVM and using bzero to clear its contents. 2504 * 2505 * off and size may not cover an area beyond a single hardware page. 2506 */ 2507 void 2508 pmap_zero_page_area(vm_page_t m, int off, int size) 2509 { 2510 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2511 2512 if (off == 0 && size == PAGE_SIZE) 2513 pagezero((void *)va); 2514 else 2515 bzero((char *)va + off, size); 2516 } 2517 2518 /* 2519 * pmap_copy_page copies the specified (machine independent) 2520 * page by mapping the page into virtual memory and using 2521 * bcopy to copy the page, one machine dependent page at a 2522 * time. 2523 */ 2524 void 2525 pmap_copy_page(vm_page_t msrc, vm_page_t mdst) 2526 { 2527 vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); 2528 vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); 2529 2530 pagecopy((void *)src, (void *)dst); 2531 } 2532 2533 int unmapped_buf_allowed = 1; 2534 2535 void 2536 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 2537 vm_offset_t b_offset, int xfersize) 2538 { 2539 void *a_cp, *b_cp; 2540 vm_page_t m_a, m_b; 2541 vm_paddr_t p_a, p_b; 2542 vm_offset_t a_pg_offset, b_pg_offset; 2543 int cnt; 2544 2545 while (xfersize > 0) { 2546 a_pg_offset = a_offset & PAGE_MASK; 2547 m_a = ma[a_offset >> PAGE_SHIFT]; 2548 p_a = m_a->phys_addr; 2549 b_pg_offset = b_offset & PAGE_MASK; 2550 m_b = mb[b_offset >> PAGE_SHIFT]; 2551 p_b = m_b->phys_addr; 2552 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2553 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2554 if (__predict_false(!PHYS_IN_DMAP(p_a))) { 2555 panic("!DMAP a %lx", p_a); 2556 } else { 2557 a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; 2558 } 2559 if (__predict_false(!PHYS_IN_DMAP(p_b))) { 2560 panic("!DMAP b %lx", p_b); 2561 } else { 2562 b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; 2563 } 2564 bcopy(a_cp, b_cp, cnt); 2565 a_offset += cnt; 2566 b_offset += cnt; 2567 xfersize -= cnt; 2568 } 2569 } 2570 2571 vm_offset_t 2572 pmap_quick_enter_page(vm_page_t m) 2573 { 2574 2575 return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); 2576 } 2577 2578 void 2579 pmap_quick_remove_page(vm_offset_t addr) 2580 { 2581 } 2582 2583 /* 2584 * Returns true if the pmap's pv is one of the first 2585 * 16 pvs linked to from this page. This count may 2586 * be changed upwards or downwards in the future; it 2587 * is only necessary that true be returned for a small 2588 * subset of pmaps for proper page aging. 2589 */ 2590 boolean_t 2591 pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2592 { 2593 struct rwlock *lock; 2594 pv_entry_t pv; 2595 int loops = 0; 2596 boolean_t rv; 2597 2598 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2599 ("pmap_page_exists_quick: page %p is not managed", m)); 2600 rv = FALSE; 2601 rw_rlock(&pvh_global_lock); 2602 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2603 rw_rlock(lock); 2604 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2605 if (PV_PMAP(pv) == pmap) { 2606 rv = TRUE; 2607 break; 2608 } 2609 loops++; 2610 if (loops >= 16) 2611 break; 2612 } 2613 rw_runlock(lock); 2614 rw_runlock(&pvh_global_lock); 2615 return (rv); 2616 } 2617 2618 /* 2619 * pmap_page_wired_mappings: 2620 * 2621 * Return the number of managed mappings to the given physical page 2622 * that are wired. 2623 */ 2624 int 2625 pmap_page_wired_mappings(vm_page_t m) 2626 { 2627 struct rwlock *lock; 2628 pmap_t pmap; 2629 pt_entry_t *l3; 2630 pv_entry_t pv; 2631 int count, md_gen; 2632 2633 if ((m->oflags & VPO_UNMANAGED) != 0) 2634 return (0); 2635 rw_rlock(&pvh_global_lock); 2636 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2637 rw_rlock(lock); 2638 restart: 2639 count = 0; 2640 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2641 pmap = PV_PMAP(pv); 2642 if (!PMAP_TRYLOCK(pmap)) { 2643 md_gen = m->md.pv_gen; 2644 rw_runlock(lock); 2645 PMAP_LOCK(pmap); 2646 rw_rlock(lock); 2647 if (md_gen != m->md.pv_gen) { 2648 PMAP_UNLOCK(pmap); 2649 goto restart; 2650 } 2651 } 2652 l3 = pmap_l3(pmap, pv->pv_va); 2653 if (l3 != NULL && (pmap_load(l3) & PTE_SW_WIRED) != 0) 2654 count++; 2655 PMAP_UNLOCK(pmap); 2656 } 2657 rw_runlock(lock); 2658 rw_runlock(&pvh_global_lock); 2659 return (count); 2660 } 2661 2662 /* 2663 * Destroy all managed, non-wired mappings in the given user-space 2664 * pmap. This pmap cannot be active on any processor besides the 2665 * caller. 2666 * 2667 * This function cannot be applied to the kernel pmap. Moreover, it 2668 * is not intended for general use. It is only to be used during 2669 * process termination. Consequently, it can be implemented in ways 2670 * that make it faster than pmap_remove(). First, it can more quickly 2671 * destroy mappings by iterating over the pmap's collection of PV 2672 * entries, rather than searching the page table. Second, it doesn't 2673 * have to test and clear the page table entries atomically, because 2674 * no processor is currently accessing the user address space. In 2675 * particular, a page table entry's dirty bit won't change state once 2676 * this function starts. 2677 */ 2678 void 2679 pmap_remove_pages(pmap_t pmap) 2680 { 2681 pd_entry_t ptepde, *l2; 2682 pt_entry_t *l3, tl3; 2683 struct spglist free; 2684 vm_page_t m; 2685 pv_entry_t pv; 2686 struct pv_chunk *pc, *npc; 2687 struct rwlock *lock; 2688 int64_t bit; 2689 uint64_t inuse, bitmask; 2690 int allfree, field, freed, idx; 2691 vm_paddr_t pa; 2692 2693 lock = NULL; 2694 2695 SLIST_INIT(&free); 2696 rw_rlock(&pvh_global_lock); 2697 PMAP_LOCK(pmap); 2698 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 2699 allfree = 1; 2700 freed = 0; 2701 for (field = 0; field < _NPCM; field++) { 2702 inuse = ~pc->pc_map[field] & pc_freemask[field]; 2703 while (inuse != 0) { 2704 bit = ffsl(inuse) - 1; 2705 bitmask = 1UL << bit; 2706 idx = field * 64 + bit; 2707 pv = &pc->pc_pventry[idx]; 2708 inuse &= ~bitmask; 2709 2710 l2 = pmap_l2(pmap, pv->pv_va); 2711 ptepde = pmap_load(l2); 2712 l3 = pmap_l2_to_l3(l2, pv->pv_va); 2713 tl3 = pmap_load(l3); 2714 2715 /* 2716 * We cannot remove wired pages from a process' mapping at this time 2717 */ 2718 if (tl3 & PTE_SW_WIRED) { 2719 allfree = 0; 2720 continue; 2721 } 2722 2723 pa = PTE_TO_PHYS(tl3); 2724 m = PHYS_TO_VM_PAGE(pa); 2725 KASSERT(m->phys_addr == pa, 2726 ("vm_page_t %p phys_addr mismatch %016jx %016jx", 2727 m, (uintmax_t)m->phys_addr, 2728 (uintmax_t)tl3)); 2729 2730 KASSERT((m->flags & PG_FICTITIOUS) != 0 || 2731 m < &vm_page_array[vm_page_array_size], 2732 ("pmap_remove_pages: bad l3 %#jx", 2733 (uintmax_t)tl3)); 2734 2735 if (pmap_is_current(pmap) && 2736 pmap_l3_valid_cacheable(pmap_load(l3))) 2737 cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 2738 pmap_load_clear(l3); 2739 PTE_SYNC(l3); 2740 pmap_invalidate_page(pmap, pv->pv_va); 2741 2742 /* 2743 * Update the vm_page_t clean/reference bits. 2744 */ 2745 if (pmap_page_dirty(tl3)) 2746 vm_page_dirty(m); 2747 2748 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); 2749 2750 /* Mark free */ 2751 pc->pc_map[field] |= bitmask; 2752 2753 pmap_resident_count_dec(pmap, 1); 2754 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2755 m->md.pv_gen++; 2756 2757 pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free); 2758 freed++; 2759 } 2760 } 2761 PV_STAT(atomic_add_long(&pv_entry_frees, freed)); 2762 PV_STAT(atomic_add_int(&pv_entry_spare, freed)); 2763 PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); 2764 if (allfree) { 2765 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2766 free_pv_chunk(pc); 2767 } 2768 } 2769 pmap_invalidate_all(pmap); 2770 if (lock != NULL) 2771 rw_wunlock(lock); 2772 rw_runlock(&pvh_global_lock); 2773 PMAP_UNLOCK(pmap); 2774 vm_page_free_pages_toq(&free, false); 2775 } 2776 2777 /* 2778 * This is used to check if a page has been accessed or modified. As we 2779 * don't have a bit to see if it has been modified we have to assume it 2780 * has been if the page is read/write. 2781 */ 2782 static boolean_t 2783 pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) 2784 { 2785 struct rwlock *lock; 2786 pv_entry_t pv; 2787 pt_entry_t *l3, mask, value; 2788 pmap_t pmap; 2789 int md_gen; 2790 boolean_t rv; 2791 2792 rv = FALSE; 2793 rw_rlock(&pvh_global_lock); 2794 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2795 rw_rlock(lock); 2796 restart: 2797 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2798 pmap = PV_PMAP(pv); 2799 if (!PMAP_TRYLOCK(pmap)) { 2800 md_gen = m->md.pv_gen; 2801 rw_runlock(lock); 2802 PMAP_LOCK(pmap); 2803 rw_rlock(lock); 2804 if (md_gen != m->md.pv_gen) { 2805 PMAP_UNLOCK(pmap); 2806 goto restart; 2807 } 2808 } 2809 l3 = pmap_l3(pmap, pv->pv_va); 2810 mask = 0; 2811 value = 0; 2812 if (modified) { 2813 mask |= PTE_D; 2814 value |= PTE_D; 2815 } 2816 if (accessed) { 2817 mask |= PTE_A; 2818 value |= PTE_A; 2819 } 2820 2821 #if 0 2822 if (modified) { 2823 mask |= ATTR_AP_RW_BIT; 2824 value |= ATTR_AP(ATTR_AP_RW); 2825 } 2826 if (accessed) { 2827 mask |= ATTR_AF | ATTR_DESCR_MASK; 2828 value |= ATTR_AF | L3_PAGE; 2829 } 2830 #endif 2831 2832 rv = (pmap_load(l3) & mask) == value; 2833 PMAP_UNLOCK(pmap); 2834 if (rv) 2835 goto out; 2836 } 2837 out: 2838 rw_runlock(lock); 2839 rw_runlock(&pvh_global_lock); 2840 return (rv); 2841 } 2842 2843 /* 2844 * pmap_is_modified: 2845 * 2846 * Return whether or not the specified physical page was modified 2847 * in any physical maps. 2848 */ 2849 boolean_t 2850 pmap_is_modified(vm_page_t m) 2851 { 2852 2853 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2854 ("pmap_is_modified: page %p is not managed", m)); 2855 2856 /* 2857 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2858 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 2859 * is clear, no PTEs can have PG_M set. 2860 */ 2861 VM_OBJECT_ASSERT_WLOCKED(m->object); 2862 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2863 return (FALSE); 2864 return (pmap_page_test_mappings(m, FALSE, TRUE)); 2865 } 2866 2867 /* 2868 * pmap_is_prefaultable: 2869 * 2870 * Return whether or not the specified virtual address is eligible 2871 * for prefault. 2872 */ 2873 boolean_t 2874 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2875 { 2876 pt_entry_t *l3; 2877 boolean_t rv; 2878 2879 rv = FALSE; 2880 PMAP_LOCK(pmap); 2881 l3 = pmap_l3(pmap, addr); 2882 if (l3 != NULL && pmap_load(l3) != 0) { 2883 rv = TRUE; 2884 } 2885 PMAP_UNLOCK(pmap); 2886 return (rv); 2887 } 2888 2889 /* 2890 * pmap_is_referenced: 2891 * 2892 * Return whether or not the specified physical page was referenced 2893 * in any physical maps. 2894 */ 2895 boolean_t 2896 pmap_is_referenced(vm_page_t m) 2897 { 2898 2899 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2900 ("pmap_is_referenced: page %p is not managed", m)); 2901 return (pmap_page_test_mappings(m, TRUE, FALSE)); 2902 } 2903 2904 /* 2905 * Clear the write and modified bits in each of the given page's mappings. 2906 */ 2907 void 2908 pmap_remove_write(vm_page_t m) 2909 { 2910 pmap_t pmap; 2911 struct rwlock *lock; 2912 pv_entry_t pv; 2913 pt_entry_t *l3, oldl3; 2914 pt_entry_t newl3; 2915 int md_gen; 2916 2917 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2918 ("pmap_remove_write: page %p is not managed", m)); 2919 2920 /* 2921 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2922 * set by another thread while the object is locked. Thus, 2923 * if PGA_WRITEABLE is clear, no page table entries need updating. 2924 */ 2925 VM_OBJECT_ASSERT_WLOCKED(m->object); 2926 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2927 return; 2928 rw_rlock(&pvh_global_lock); 2929 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2930 retry_pv_loop: 2931 rw_wlock(lock); 2932 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2933 pmap = PV_PMAP(pv); 2934 if (!PMAP_TRYLOCK(pmap)) { 2935 md_gen = m->md.pv_gen; 2936 rw_wunlock(lock); 2937 PMAP_LOCK(pmap); 2938 rw_wlock(lock); 2939 if (md_gen != m->md.pv_gen) { 2940 PMAP_UNLOCK(pmap); 2941 rw_wunlock(lock); 2942 goto retry_pv_loop; 2943 } 2944 } 2945 l3 = pmap_l3(pmap, pv->pv_va); 2946 retry: 2947 oldl3 = pmap_load(l3); 2948 2949 if (pmap_is_write(oldl3)) { 2950 newl3 = oldl3 & ~(PTE_W); 2951 if (!atomic_cmpset_long(l3, oldl3, newl3)) 2952 goto retry; 2953 /* TODO: use pmap_page_dirty(oldl3) ? */ 2954 if ((oldl3 & PTE_A) != 0) 2955 vm_page_dirty(m); 2956 pmap_invalidate_page(pmap, pv->pv_va); 2957 } 2958 PMAP_UNLOCK(pmap); 2959 } 2960 rw_wunlock(lock); 2961 vm_page_aflag_clear(m, PGA_WRITEABLE); 2962 rw_runlock(&pvh_global_lock); 2963 } 2964 2965 static __inline boolean_t 2966 safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) 2967 { 2968 2969 return (FALSE); 2970 } 2971 2972 /* 2973 * pmap_ts_referenced: 2974 * 2975 * Return a count of reference bits for a page, clearing those bits. 2976 * It is not necessary for every reference bit to be cleared, but it 2977 * is necessary that 0 only be returned when there are truly no 2978 * reference bits set. 2979 * 2980 * As an optimization, update the page's dirty field if a modified bit is 2981 * found while counting reference bits. This opportunistic update can be 2982 * performed at low cost and can eliminate the need for some future calls 2983 * to pmap_is_modified(). However, since this function stops after 2984 * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some 2985 * dirty pages. Those dirty pages will only be detected by a future call 2986 * to pmap_is_modified(). 2987 */ 2988 int 2989 pmap_ts_referenced(vm_page_t m) 2990 { 2991 pv_entry_t pv, pvf; 2992 pmap_t pmap; 2993 struct rwlock *lock; 2994 pd_entry_t *l2; 2995 pt_entry_t *l3, old_l3; 2996 vm_paddr_t pa; 2997 int cleared, md_gen, not_cleared; 2998 struct spglist free; 2999 3000 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3001 ("pmap_ts_referenced: page %p is not managed", m)); 3002 SLIST_INIT(&free); 3003 cleared = 0; 3004 pa = VM_PAGE_TO_PHYS(m); 3005 lock = PHYS_TO_PV_LIST_LOCK(pa); 3006 rw_rlock(&pvh_global_lock); 3007 rw_wlock(lock); 3008 retry: 3009 not_cleared = 0; 3010 if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) 3011 goto out; 3012 pv = pvf; 3013 do { 3014 if (pvf == NULL) 3015 pvf = pv; 3016 pmap = PV_PMAP(pv); 3017 if (!PMAP_TRYLOCK(pmap)) { 3018 md_gen = m->md.pv_gen; 3019 rw_wunlock(lock); 3020 PMAP_LOCK(pmap); 3021 rw_wlock(lock); 3022 if (md_gen != m->md.pv_gen) { 3023 PMAP_UNLOCK(pmap); 3024 goto retry; 3025 } 3026 } 3027 l2 = pmap_l2(pmap, pv->pv_va); 3028 3029 KASSERT((pmap_load(l2) & PTE_RX) == 0, 3030 ("pmap_ts_referenced: found an invalid l2 table")); 3031 3032 l3 = pmap_l2_to_l3(l2, pv->pv_va); 3033 old_l3 = pmap_load(l3); 3034 if (pmap_page_dirty(old_l3)) 3035 vm_page_dirty(m); 3036 if ((old_l3 & PTE_A) != 0) { 3037 if (safe_to_clear_referenced(pmap, old_l3)) { 3038 /* 3039 * TODO: We don't handle the access flag 3040 * at all. We need to be able to set it in 3041 * the exception handler. 3042 */ 3043 panic("RISCVTODO: safe_to_clear_referenced\n"); 3044 } else if ((old_l3 & PTE_SW_WIRED) == 0) { 3045 /* 3046 * Wired pages cannot be paged out so 3047 * doing accessed bit emulation for 3048 * them is wasted effort. We do the 3049 * hard work for unwired pages only. 3050 */ 3051 pmap_remove_l3(pmap, l3, pv->pv_va, 3052 pmap_load(l2), &free, &lock); 3053 pmap_invalidate_page(pmap, pv->pv_va); 3054 cleared++; 3055 if (pvf == pv) 3056 pvf = NULL; 3057 pv = NULL; 3058 KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 3059 ("inconsistent pv lock %p %p for page %p", 3060 lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 3061 } else 3062 not_cleared++; 3063 } 3064 PMAP_UNLOCK(pmap); 3065 /* Rotate the PV list if it has more than one entry. */ 3066 if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { 3067 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 3068 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 3069 m->md.pv_gen++; 3070 } 3071 } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + 3072 not_cleared < PMAP_TS_REFERENCED_MAX); 3073 out: 3074 rw_wunlock(lock); 3075 rw_runlock(&pvh_global_lock); 3076 vm_page_free_pages_toq(&free, false); 3077 return (cleared + not_cleared); 3078 } 3079 3080 /* 3081 * Apply the given advice to the specified range of addresses within the 3082 * given pmap. Depending on the advice, clear the referenced and/or 3083 * modified flags in each mapping and set the mapped page's dirty field. 3084 */ 3085 void 3086 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 3087 { 3088 } 3089 3090 /* 3091 * Clear the modify bits on the specified physical page. 3092 */ 3093 void 3094 pmap_clear_modify(vm_page_t m) 3095 { 3096 3097 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3098 ("pmap_clear_modify: page %p is not managed", m)); 3099 VM_OBJECT_ASSERT_WLOCKED(m->object); 3100 KASSERT(!vm_page_xbusied(m), 3101 ("pmap_clear_modify: page %p is exclusive busied", m)); 3102 3103 /* 3104 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. 3105 * If the object containing the page is locked and the page is not 3106 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 3107 */ 3108 if ((m->aflags & PGA_WRITEABLE) == 0) 3109 return; 3110 3111 /* RISCVTODO: We lack support for tracking if a page is modified */ 3112 } 3113 3114 void * 3115 pmap_mapbios(vm_paddr_t pa, vm_size_t size) 3116 { 3117 3118 return ((void *)PHYS_TO_DMAP(pa)); 3119 } 3120 3121 void 3122 pmap_unmapbios(vm_paddr_t pa, vm_size_t size) 3123 { 3124 } 3125 3126 /* 3127 * Sets the memory attribute for the specified page. 3128 */ 3129 void 3130 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 3131 { 3132 3133 m->md.pv_memattr = ma; 3134 3135 /* 3136 * RISCVTODO: Implement the below (from the amd64 pmap) 3137 * If "m" is a normal page, update its direct mapping. This update 3138 * can be relied upon to perform any cache operations that are 3139 * required for data coherence. 3140 */ 3141 if ((m->flags & PG_FICTITIOUS) == 0 && 3142 PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m))) 3143 panic("RISCVTODO: pmap_page_set_memattr"); 3144 } 3145 3146 /* 3147 * perform the pmap work for mincore 3148 */ 3149 int 3150 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 3151 { 3152 3153 panic("RISCVTODO: pmap_mincore"); 3154 } 3155 3156 void 3157 pmap_activate(struct thread *td) 3158 { 3159 pmap_t pmap; 3160 uint64_t reg; 3161 3162 critical_enter(); 3163 pmap = vmspace_pmap(td->td_proc->p_vmspace); 3164 td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1); 3165 3166 reg = SATP_MODE_SV39; 3167 reg |= (td->td_pcb->pcb_l1addr >> PAGE_SHIFT); 3168 __asm __volatile("csrw sptbr, %0" :: "r"(reg)); 3169 3170 pmap_invalidate_all(pmap); 3171 critical_exit(); 3172 } 3173 3174 void 3175 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 3176 { 3177 3178 panic("RISCVTODO: pmap_sync_icache"); 3179 } 3180 3181 /* 3182 * Increase the starting virtual address of the given mapping if a 3183 * different alignment might result in more superpage mappings. 3184 */ 3185 void 3186 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 3187 vm_offset_t *addr, vm_size_t size) 3188 { 3189 } 3190 3191 /** 3192 * Get the kernel virtual address of a set of physical pages. If there are 3193 * physical addresses not covered by the DMAP perform a transient mapping 3194 * that will be removed when calling pmap_unmap_io_transient. 3195 * 3196 * \param page The pages the caller wishes to obtain the virtual 3197 * address on the kernel memory map. 3198 * \param vaddr On return contains the kernel virtual memory address 3199 * of the pages passed in the page parameter. 3200 * \param count Number of pages passed in. 3201 * \param can_fault TRUE if the thread using the mapped pages can take 3202 * page faults, FALSE otherwise. 3203 * 3204 * \returns TRUE if the caller must call pmap_unmap_io_transient when 3205 * finished or FALSE otherwise. 3206 * 3207 */ 3208 boolean_t 3209 pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 3210 boolean_t can_fault) 3211 { 3212 vm_paddr_t paddr; 3213 boolean_t needs_mapping; 3214 int error, i; 3215 3216 /* 3217 * Allocate any KVA space that we need, this is done in a separate 3218 * loop to prevent calling vmem_alloc while pinned. 3219 */ 3220 needs_mapping = FALSE; 3221 for (i = 0; i < count; i++) { 3222 paddr = VM_PAGE_TO_PHYS(page[i]); 3223 if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) { 3224 error = vmem_alloc(kernel_arena, PAGE_SIZE, 3225 M_BESTFIT | M_WAITOK, &vaddr[i]); 3226 KASSERT(error == 0, ("vmem_alloc failed: %d", error)); 3227 needs_mapping = TRUE; 3228 } else { 3229 vaddr[i] = PHYS_TO_DMAP(paddr); 3230 } 3231 } 3232 3233 /* Exit early if everything is covered by the DMAP */ 3234 if (!needs_mapping) 3235 return (FALSE); 3236 3237 if (!can_fault) 3238 sched_pin(); 3239 for (i = 0; i < count; i++) { 3240 paddr = VM_PAGE_TO_PHYS(page[i]); 3241 if (paddr >= DMAP_MAX_PHYSADDR) { 3242 panic( 3243 "pmap_map_io_transient: TODO: Map out of DMAP data"); 3244 } 3245 } 3246 3247 return (needs_mapping); 3248 } 3249 3250 void 3251 pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 3252 boolean_t can_fault) 3253 { 3254 vm_paddr_t paddr; 3255 int i; 3256 3257 if (!can_fault) 3258 sched_unpin(); 3259 for (i = 0; i < count; i++) { 3260 paddr = VM_PAGE_TO_PHYS(page[i]); 3261 if (paddr >= DMAP_MAX_PHYSADDR) { 3262 panic("RISCVTODO: pmap_unmap_io_transient: Unmap data"); 3263 } 3264 } 3265 } 3266