1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 1996, by Steve Passe 5 * Copyright (c) 2003, by Peter Wemm 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. The name of the developer may NOT be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include "opt_cpu.h" 33 #include "opt_ddb.h" 34 #include "opt_kstack_pages.h" 35 #include "opt_sched.h" 36 #include "opt_smp.h" 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/bus.h> 41 #include <sys/cpuset.h> 42 #include <sys/domainset.h> 43 #ifdef GPROF 44 #include <sys/gmon.h> 45 #endif 46 #include <sys/kernel.h> 47 #include <sys/ktr.h> 48 #include <sys/lock.h> 49 #include <sys/malloc.h> 50 #include <sys/memrange.h> 51 #include <sys/mutex.h> 52 #include <sys/pcpu.h> 53 #include <sys/proc.h> 54 #include <sys/sched.h> 55 #include <sys/smp.h> 56 #include <sys/sysctl.h> 57 58 #include <vm/vm.h> 59 #include <vm/vm_param.h> 60 #include <vm/pmap.h> 61 #include <vm/vm_kern.h> 62 #include <vm/vm_extern.h> 63 #include <vm/vm_page.h> 64 #include <vm/vm_phys.h> 65 66 #include <x86/apicreg.h> 67 #include <machine/clock.h> 68 #include <machine/cputypes.h> 69 #include <machine/cpufunc.h> 70 #include <x86/mca.h> 71 #include <machine/md_var.h> 72 #include <machine/pcb.h> 73 #include <machine/psl.h> 74 #include <machine/smp.h> 75 #include <machine/specialreg.h> 76 #include <machine/tss.h> 77 #include <x86/ucode.h> 78 #include <machine/cpu.h> 79 #include <x86/init.h> 80 81 #include <contrib/dev/acpica/include/acpi.h> 82 #include <dev/acpica/acpivar.h> 83 84 #define WARMBOOT_TARGET 0 85 #define WARMBOOT_OFF (KERNBASE + 0x0467) 86 #define WARMBOOT_SEG (KERNBASE + 0x0469) 87 88 #define CMOS_REG (0x70) 89 #define CMOS_DATA (0x71) 90 #define BIOS_RESET (0x0f) 91 #define BIOS_WARM (0x0a) 92 93 #define GiB(v) (v ## ULL << 30) 94 95 #define AP_BOOTPT_SZ (PAGE_SIZE * 3) 96 97 /* Temporary variables for init_secondary() */ 98 char *doublefault_stack; 99 char *mce_stack; 100 char *nmi_stack; 101 char *dbg_stack; 102 103 /* 104 * Local data and functions. 105 */ 106 107 static int start_ap(int apic_id); 108 109 static bool 110 is_kernel_paddr(vm_paddr_t pa) 111 { 112 113 return (pa >= trunc_2mpage(btext - KERNBASE) && 114 pa < round_page(_end - KERNBASE)); 115 } 116 117 static bool 118 is_mpboot_good(vm_paddr_t start, vm_paddr_t end) 119 { 120 121 return (start + AP_BOOTPT_SZ <= GiB(4) && atop(end) < Maxmem); 122 } 123 124 /* 125 * Calculate usable address in base memory for AP trampoline code. 126 */ 127 void 128 mp_bootaddress(vm_paddr_t *physmap, unsigned int *physmap_idx) 129 { 130 vm_paddr_t start, end; 131 unsigned int i; 132 bool allocated; 133 134 alloc_ap_trampoline(physmap, physmap_idx); 135 136 /* 137 * Find a memory region big enough below the 4GB boundary to 138 * store the initial page tables. Region must be mapped by 139 * the direct map. 140 * 141 * Note that it needs to be aligned to a page boundary. 142 */ 143 allocated = false; 144 for (i = *physmap_idx; i <= *physmap_idx; i -= 2) { 145 /* 146 * First, try to chomp at the start of the physmap region. 147 * Kernel binary might claim it already. 148 */ 149 start = round_page(physmap[i]); 150 end = start + AP_BOOTPT_SZ; 151 if (start < end && end <= physmap[i + 1] && 152 is_mpboot_good(start, end) && 153 !is_kernel_paddr(start) && !is_kernel_paddr(end - 1)) { 154 allocated = true; 155 physmap[i] = end; 156 break; 157 } 158 159 /* 160 * Second, try to chomp at the end. Again, check 161 * against kernel. 162 */ 163 end = trunc_page(physmap[i + 1]); 164 start = end - AP_BOOTPT_SZ; 165 if (start < end && start >= physmap[i] && 166 is_mpboot_good(start, end) && 167 !is_kernel_paddr(start) && !is_kernel_paddr(end - 1)) { 168 allocated = true; 169 physmap[i + 1] = start; 170 break; 171 } 172 } 173 if (allocated) { 174 mptramp_pagetables = start; 175 if (physmap[i] == physmap[i + 1] && *physmap_idx != 0) { 176 memmove(&physmap[i], &physmap[i + 2], 177 sizeof(*physmap) * (*physmap_idx - i + 2)); 178 *physmap_idx -= 2; 179 } 180 } else { 181 mptramp_pagetables = trunc_page(boot_address) - AP_BOOTPT_SZ; 182 if (bootverbose) 183 printf( 184 "Cannot find enough space for the initial AP page tables, placing them at %#x", 185 mptramp_pagetables); 186 } 187 } 188 189 /* 190 * Initialize the IPI handlers and start up the AP's. 191 */ 192 void 193 cpu_mp_start(void) 194 { 195 int i; 196 197 /* Initialize the logical ID to APIC ID table. */ 198 for (i = 0; i < MAXCPU; i++) { 199 cpu_apic_ids[i] = -1; 200 } 201 202 /* Install an inter-CPU IPI for TLB invalidation */ 203 if (pmap_pcid_enabled) { 204 if (invpcid_works) { 205 setidt(IPI_INVLTLB, pti ? 206 IDTVEC(invltlb_invpcid_pti_pti) : 207 IDTVEC(invltlb_invpcid_nopti), SDT_SYSIGT, 208 SEL_KPL, 0); 209 setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_invpcid_pti) : 210 IDTVEC(invlpg_invpcid), SDT_SYSIGT, SEL_KPL, 0); 211 setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_invpcid_pti) : 212 IDTVEC(invlrng_invpcid), SDT_SYSIGT, SEL_KPL, 0); 213 } else { 214 setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pcid_pti) : 215 IDTVEC(invltlb_pcid), SDT_SYSIGT, SEL_KPL, 0); 216 setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pcid_pti) : 217 IDTVEC(invlpg_pcid), SDT_SYSIGT, SEL_KPL, 0); 218 setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pcid_pti) : 219 IDTVEC(invlrng_pcid), SDT_SYSIGT, SEL_KPL, 0); 220 } 221 } else { 222 setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pti) : IDTVEC(invltlb), 223 SDT_SYSIGT, SEL_KPL, 0); 224 setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pti) : IDTVEC(invlpg), 225 SDT_SYSIGT, SEL_KPL, 0); 226 setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pti) : IDTVEC(invlrng), 227 SDT_SYSIGT, SEL_KPL, 0); 228 } 229 230 /* Install an inter-CPU IPI for cache invalidation. */ 231 setidt(IPI_INVLCACHE, pti ? IDTVEC(invlcache_pti) : IDTVEC(invlcache), 232 SDT_SYSIGT, SEL_KPL, 0); 233 234 /* Install an inter-CPU IPI for all-CPU rendezvous */ 235 setidt(IPI_RENDEZVOUS, pti ? IDTVEC(rendezvous_pti) : 236 IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0); 237 238 /* Install generic inter-CPU IPI handler */ 239 setidt(IPI_BITMAP_VECTOR, pti ? IDTVEC(ipi_intr_bitmap_handler_pti) : 240 IDTVEC(ipi_intr_bitmap_handler), SDT_SYSIGT, SEL_KPL, 0); 241 242 /* Install an inter-CPU IPI for CPU stop/restart */ 243 setidt(IPI_STOP, pti ? IDTVEC(cpustop_pti) : IDTVEC(cpustop), 244 SDT_SYSIGT, SEL_KPL, 0); 245 246 /* Install an inter-CPU IPI for CPU suspend/resume */ 247 setidt(IPI_SUSPEND, pti ? IDTVEC(cpususpend_pti) : IDTVEC(cpususpend), 248 SDT_SYSIGT, SEL_KPL, 0); 249 250 /* Set boot_cpu_id if needed. */ 251 if (boot_cpu_id == -1) { 252 boot_cpu_id = PCPU_GET(apic_id); 253 cpu_info[boot_cpu_id].cpu_bsp = 1; 254 } else 255 KASSERT(boot_cpu_id == PCPU_GET(apic_id), 256 ("BSP's APIC ID doesn't match boot_cpu_id")); 257 258 /* Probe logical/physical core configuration. */ 259 topo_probe(); 260 261 assign_cpu_ids(); 262 263 /* Start each Application Processor */ 264 init_ops.start_all_aps(); 265 266 set_interrupt_apic_ids(); 267 } 268 269 270 /* 271 * AP CPU's call this to initialize themselves. 272 */ 273 void 274 init_secondary(void) 275 { 276 struct pcpu *pc; 277 struct nmi_pcpu *np; 278 u_int64_t cr0; 279 int cpu, gsel_tss, x; 280 struct region_descriptor ap_gdt; 281 282 /* Set by the startup code for us to use */ 283 cpu = bootAP; 284 285 /* Update microcode before doing anything else. */ 286 ucode_load_ap(cpu); 287 288 /* Init tss */ 289 common_tss[cpu] = common_tss[0]; 290 common_tss[cpu].tss_iobase = sizeof(struct amd64tss) + 291 IOPERM_BITMAP_SIZE; 292 common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE]; 293 294 /* The NMI stack runs on IST2. */ 295 np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1; 296 common_tss[cpu].tss_ist2 = (long) np; 297 298 /* The MC# stack runs on IST3. */ 299 np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1; 300 common_tss[cpu].tss_ist3 = (long) np; 301 302 /* The DB# stack runs on IST4. */ 303 np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1; 304 common_tss[cpu].tss_ist4 = (long) np; 305 306 /* Prepare private GDT */ 307 gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu]; 308 for (x = 0; x < NGDT; x++) { 309 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && 310 x != GUSERLDT_SEL && x != (GUSERLDT_SEL + 1)) 311 ssdtosd(&gdt_segs[x], &gdt[NGDT * cpu + x]); 312 } 313 ssdtosyssd(&gdt_segs[GPROC0_SEL], 314 (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]); 315 ap_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 316 ap_gdt.rd_base = (long) &gdt[NGDT * cpu]; 317 lgdt(&ap_gdt); /* does magic intra-segment return */ 318 319 /* Get per-cpu data */ 320 pc = &__pcpu[cpu]; 321 322 /* prime data page for it to use */ 323 pcpu_init(pc, cpu, sizeof(struct pcpu)); 324 dpcpu_init(dpcpu, cpu); 325 pc->pc_apic_id = cpu_apic_ids[cpu]; 326 pc->pc_prvspace = pc; 327 pc->pc_curthread = 0; 328 pc->pc_tssp = &common_tss[cpu]; 329 pc->pc_commontssp = &common_tss[cpu]; 330 pc->pc_rsp0 = 0; 331 pc->pc_pti_rsp0 = (((vm_offset_t)&pc->pc_pti_stack + 332 PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful); 333 pc->pc_tss = (struct system_segment_descriptor *)&gdt[NGDT * cpu + 334 GPROC0_SEL]; 335 pc->pc_fs32p = &gdt[NGDT * cpu + GUFS32_SEL]; 336 pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL]; 337 pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu + 338 GUSERLDT_SEL]; 339 /* See comment in pmap_bootstrap(). */ 340 pc->pc_pcid_next = PMAP_PCID_KERN + 2; 341 pc->pc_pcid_gen = 1; 342 common_tss[cpu].tss_rsp0 = 0; 343 344 /* Save the per-cpu pointer for use by the NMI handler. */ 345 np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1; 346 np->np_pcpu = (register_t) pc; 347 348 /* Save the per-cpu pointer for use by the MC# handler. */ 349 np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1; 350 np->np_pcpu = (register_t) pc; 351 352 /* Save the per-cpu pointer for use by the DB# handler. */ 353 np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1; 354 np->np_pcpu = (register_t) pc; 355 356 wrmsr(MSR_FSBASE, 0); /* User value */ 357 wrmsr(MSR_GSBASE, (u_int64_t)pc); 358 wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */ 359 fix_cpuid(); 360 361 lidt(&r_idt); 362 363 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 364 ltr(gsel_tss); 365 366 /* 367 * Set to a known state: 368 * Set by mpboot.s: CR0_PG, CR0_PE 369 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 370 */ 371 cr0 = rcr0(); 372 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); 373 load_cr0(cr0); 374 375 amd64_conf_fast_syscall(); 376 377 /* signal our startup to the BSP. */ 378 mp_naps++; 379 380 /* Spin until the BSP releases the AP's. */ 381 while (atomic_load_acq_int(&aps_ready) == 0) 382 ia32_pause(); 383 384 init_secondary_tail(); 385 } 386 387 /******************************************************************* 388 * local functions and data 389 */ 390 391 #ifdef NUMA 392 static void 393 mp_realloc_pcpu(int cpuid, int domain) 394 { 395 vm_page_t m; 396 vm_offset_t oa, na; 397 398 oa = (vm_offset_t)&__pcpu[cpuid]; 399 if (_vm_phys_domain(pmap_kextract(oa)) == domain) 400 return; 401 m = vm_page_alloc_domain(NULL, 0, domain, 402 VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ); 403 if (m == NULL) 404 return; 405 na = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 406 pagecopy((void *)oa, (void *)na); 407 pmap_qenter((vm_offset_t)&__pcpu[cpuid], &m, 1); 408 /* XXX old pcpu page leaked. */ 409 } 410 #endif 411 412 /* 413 * start each AP in our list 414 */ 415 int 416 native_start_all_aps(void) 417 { 418 u_int64_t *pt4, *pt3, *pt2; 419 u_int32_t mpbioswarmvec; 420 int apic_id, cpu, domain, i; 421 u_char mpbiosreason; 422 423 mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 424 425 /* copy the AP 1st level boot code */ 426 bcopy(mptramp_start, (void *)PHYS_TO_DMAP(boot_address), bootMP_size); 427 428 /* Locate the page tables, they'll be below the trampoline */ 429 pt4 = (uint64_t *)PHYS_TO_DMAP(mptramp_pagetables); 430 pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t); 431 pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t); 432 433 /* Create the initial 1GB replicated page tables */ 434 for (i = 0; i < 512; i++) { 435 /* Each slot of the level 4 pages points to the same level 3 page */ 436 pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE); 437 pt4[i] |= PG_V | PG_RW | PG_U; 438 439 /* Each slot of the level 3 pages points to the same level 2 page */ 440 pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE)); 441 pt3[i] |= PG_V | PG_RW | PG_U; 442 443 /* The level 2 page slots are mapped with 2MB pages for 1GB. */ 444 pt2[i] = i * (2 * 1024 * 1024); 445 pt2[i] |= PG_V | PG_RW | PG_PS | PG_U; 446 } 447 448 /* save the current value of the warm-start vector */ 449 mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); 450 outb(CMOS_REG, BIOS_RESET); 451 mpbiosreason = inb(CMOS_DATA); 452 453 /* setup a vector to our boot code */ 454 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; 455 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); 456 outb(CMOS_REG, BIOS_RESET); 457 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ 458 459 /* Relocate pcpu areas to the correct domain. */ 460 #ifdef NUMA 461 if (vm_ndomains > 1) 462 for (cpu = 1; cpu < mp_ncpus; cpu++) { 463 apic_id = cpu_apic_ids[cpu]; 464 domain = acpi_pxm_get_cpu_locality(apic_id); 465 mp_realloc_pcpu(cpu, domain); 466 } 467 #endif 468 469 /* start each AP */ 470 domain = 0; 471 for (cpu = 1; cpu < mp_ncpus; cpu++) { 472 apic_id = cpu_apic_ids[cpu]; 473 #ifdef NUMA 474 if (vm_ndomains > 1) 475 domain = acpi_pxm_get_cpu_locality(apic_id); 476 #endif 477 /* allocate and set up an idle stack data page */ 478 bootstacks[cpu] = (void *)kmem_malloc(kstack_pages * PAGE_SIZE, 479 M_WAITOK | M_ZERO); 480 doublefault_stack = (char *)kmem_malloc(PAGE_SIZE, M_WAITOK | 481 M_ZERO); 482 mce_stack = (char *)kmem_malloc(PAGE_SIZE, M_WAITOK | M_ZERO); 483 nmi_stack = (char *)kmem_malloc_domainset( 484 DOMAINSET_PREF(domain), PAGE_SIZE, M_WAITOK | M_ZERO); 485 dbg_stack = (char *)kmem_malloc_domainset( 486 DOMAINSET_PREF(domain), PAGE_SIZE, M_WAITOK | M_ZERO); 487 dpcpu = (void *)kmem_malloc_domainset(DOMAINSET_PREF(domain), 488 DPCPU_SIZE, M_WAITOK | M_ZERO); 489 490 bootSTK = (char *)bootstacks[cpu] + 491 kstack_pages * PAGE_SIZE - 8; 492 bootAP = cpu; 493 494 /* attempt to start the Application Processor */ 495 if (!start_ap(apic_id)) { 496 /* restore the warmstart vector */ 497 *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; 498 panic("AP #%d (PHY# %d) failed!", cpu, apic_id); 499 } 500 501 CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ 502 } 503 504 /* restore the warmstart vector */ 505 *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; 506 507 outb(CMOS_REG, BIOS_RESET); 508 outb(CMOS_DATA, mpbiosreason); 509 510 /* number of APs actually started */ 511 return (mp_naps); 512 } 513 514 515 /* 516 * This function starts the AP (application processor) identified 517 * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 518 * to accomplish this. This is necessary because of the nuances 519 * of the different hardware we might encounter. It isn't pretty, 520 * but it seems to work. 521 */ 522 static int 523 start_ap(int apic_id) 524 { 525 int vector, ms; 526 int cpus; 527 528 /* calculate the vector */ 529 vector = (boot_address >> 12) & 0xff; 530 531 /* used as a watchpoint to signal AP startup */ 532 cpus = mp_naps; 533 534 ipi_startup(apic_id, vector); 535 536 /* Wait up to 5 seconds for it to start. */ 537 for (ms = 0; ms < 5000; ms++) { 538 if (mp_naps > cpus) 539 return 1; /* return SUCCESS */ 540 DELAY(1000); 541 } 542 return 0; /* return FAILURE */ 543 } 544 545 void 546 invltlb_invpcid_handler(void) 547 { 548 struct invpcid_descr d; 549 uint32_t generation; 550 551 #ifdef COUNT_XINVLTLB_HITS 552 xhits_gbl[PCPU_GET(cpuid)]++; 553 #endif /* COUNT_XINVLTLB_HITS */ 554 #ifdef COUNT_IPIS 555 (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; 556 #endif /* COUNT_IPIS */ 557 558 generation = smp_tlb_generation; 559 d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; 560 d.pad = 0; 561 d.addr = 0; 562 invpcid(&d, smp_tlb_pmap == kernel_pmap ? INVPCID_CTXGLOB : 563 INVPCID_CTX); 564 PCPU_SET(smp_tlb_done, generation); 565 } 566 567 void 568 invltlb_invpcid_pti_handler(void) 569 { 570 struct invpcid_descr d; 571 uint32_t generation; 572 573 #ifdef COUNT_XINVLTLB_HITS 574 xhits_gbl[PCPU_GET(cpuid)]++; 575 #endif /* COUNT_XINVLTLB_HITS */ 576 #ifdef COUNT_IPIS 577 (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; 578 #endif /* COUNT_IPIS */ 579 580 generation = smp_tlb_generation; 581 d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; 582 d.pad = 0; 583 d.addr = 0; 584 if (smp_tlb_pmap == kernel_pmap) { 585 /* 586 * This invalidation actually needs to clear kernel 587 * mappings from the TLB in the current pmap, but 588 * since we were asked for the flush in the kernel 589 * pmap, achieve it by performing global flush. 590 */ 591 invpcid(&d, INVPCID_CTXGLOB); 592 } else { 593 invpcid(&d, INVPCID_CTX); 594 d.pcid |= PMAP_PCID_USER_PT; 595 invpcid(&d, INVPCID_CTX); 596 } 597 PCPU_SET(smp_tlb_done, generation); 598 } 599 600 void 601 invltlb_pcid_handler(void) 602 { 603 uint64_t kcr3, ucr3; 604 uint32_t generation, pcid; 605 606 #ifdef COUNT_XINVLTLB_HITS 607 xhits_gbl[PCPU_GET(cpuid)]++; 608 #endif /* COUNT_XINVLTLB_HITS */ 609 #ifdef COUNT_IPIS 610 (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; 611 #endif /* COUNT_IPIS */ 612 613 generation = smp_tlb_generation; /* Overlap with serialization */ 614 if (smp_tlb_pmap == kernel_pmap) { 615 invltlb_glob(); 616 } else { 617 /* 618 * The current pmap might not be equal to 619 * smp_tlb_pmap. The clearing of the pm_gen in 620 * pmap_invalidate_all() takes care of TLB 621 * invalidation when switching to the pmap on this 622 * CPU. 623 */ 624 if (PCPU_GET(curpmap) == smp_tlb_pmap) { 625 pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; 626 kcr3 = smp_tlb_pmap->pm_cr3 | pcid; 627 ucr3 = smp_tlb_pmap->pm_ucr3; 628 if (ucr3 != PMAP_NO_CR3) { 629 ucr3 |= PMAP_PCID_USER_PT | pcid; 630 pmap_pti_pcid_invalidate(ucr3, kcr3); 631 } else 632 load_cr3(kcr3); 633 } 634 } 635 PCPU_SET(smp_tlb_done, generation); 636 } 637 638 void 639 invlpg_invpcid_handler(void) 640 { 641 struct invpcid_descr d; 642 uint32_t generation; 643 644 #ifdef COUNT_XINVLTLB_HITS 645 xhits_pg[PCPU_GET(cpuid)]++; 646 #endif /* COUNT_XINVLTLB_HITS */ 647 #ifdef COUNT_IPIS 648 (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; 649 #endif /* COUNT_IPIS */ 650 651 generation = smp_tlb_generation; /* Overlap with serialization */ 652 invlpg(smp_tlb_addr1); 653 if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) { 654 d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid | 655 PMAP_PCID_USER_PT; 656 d.pad = 0; 657 d.addr = smp_tlb_addr1; 658 invpcid(&d, INVPCID_ADDR); 659 } 660 PCPU_SET(smp_tlb_done, generation); 661 } 662 663 void 664 invlpg_pcid_handler(void) 665 { 666 uint64_t kcr3, ucr3; 667 uint32_t generation; 668 uint32_t pcid; 669 670 #ifdef COUNT_XINVLTLB_HITS 671 xhits_pg[PCPU_GET(cpuid)]++; 672 #endif /* COUNT_XINVLTLB_HITS */ 673 #ifdef COUNT_IPIS 674 (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; 675 #endif /* COUNT_IPIS */ 676 677 generation = smp_tlb_generation; /* Overlap with serialization */ 678 invlpg(smp_tlb_addr1); 679 if (smp_tlb_pmap == PCPU_GET(curpmap) && 680 (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) { 681 pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; 682 kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE; 683 ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; 684 pmap_pti_pcid_invlpg(ucr3, kcr3, smp_tlb_addr1); 685 } 686 PCPU_SET(smp_tlb_done, generation); 687 } 688 689 void 690 invlrng_invpcid_handler(void) 691 { 692 struct invpcid_descr d; 693 vm_offset_t addr, addr2; 694 uint32_t generation; 695 696 #ifdef COUNT_XINVLTLB_HITS 697 xhits_rng[PCPU_GET(cpuid)]++; 698 #endif /* COUNT_XINVLTLB_HITS */ 699 #ifdef COUNT_IPIS 700 (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; 701 #endif /* COUNT_IPIS */ 702 703 addr = smp_tlb_addr1; 704 addr2 = smp_tlb_addr2; 705 generation = smp_tlb_generation; /* Overlap with serialization */ 706 do { 707 invlpg(addr); 708 addr += PAGE_SIZE; 709 } while (addr < addr2); 710 if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) { 711 d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid | 712 PMAP_PCID_USER_PT; 713 d.pad = 0; 714 d.addr = smp_tlb_addr1; 715 do { 716 invpcid(&d, INVPCID_ADDR); 717 d.addr += PAGE_SIZE; 718 } while (d.addr < addr2); 719 } 720 PCPU_SET(smp_tlb_done, generation); 721 } 722 723 void 724 invlrng_pcid_handler(void) 725 { 726 vm_offset_t addr, addr2; 727 uint64_t kcr3, ucr3; 728 uint32_t generation; 729 uint32_t pcid; 730 731 #ifdef COUNT_XINVLTLB_HITS 732 xhits_rng[PCPU_GET(cpuid)]++; 733 #endif /* COUNT_XINVLTLB_HITS */ 734 #ifdef COUNT_IPIS 735 (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; 736 #endif /* COUNT_IPIS */ 737 738 addr = smp_tlb_addr1; 739 addr2 = smp_tlb_addr2; 740 generation = smp_tlb_generation; /* Overlap with serialization */ 741 do { 742 invlpg(addr); 743 addr += PAGE_SIZE; 744 } while (addr < addr2); 745 if (smp_tlb_pmap == PCPU_GET(curpmap) && 746 (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) { 747 pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; 748 kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE; 749 ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; 750 pmap_pti_pcid_invlrng(ucr3, kcr3, smp_tlb_addr1, addr2); 751 } 752 PCPU_SET(smp_tlb_done, generation); 753 } 754