1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1996, by Steve Passe 5 * All rights reserved. 6 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. The name of the developer may NOT be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 3. Neither the name of the author nor the names of any co-contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Local APIC support on Pentium and later processors. 34 */ 35 36 #include <sys/cdefs.h> 37 #include "opt_atpic.h" 38 #include "opt_hwpmc_hooks.h" 39 40 #include "opt_ddb.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/asan.h> 45 #include <sys/bus.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/malloc.h> 49 #include <sys/msan.h> 50 #include <sys/mutex.h> 51 #include <sys/pcpu.h> 52 #include <sys/proc.h> 53 #include <sys/sched.h> 54 #include <sys/smp.h> 55 #include <sys/sysctl.h> 56 #include <sys/timeet.h> 57 #include <sys/timetc.h> 58 59 #include <vm/vm.h> 60 #include <vm/pmap.h> 61 62 #include <x86/apicreg.h> 63 #include <machine/clock.h> 64 #include <machine/cpufunc.h> 65 #include <machine/cputypes.h> 66 #include <machine/fpu.h> 67 #include <machine/frame.h> 68 #include <machine/intr_machdep.h> 69 #include <x86/apicvar.h> 70 #include <x86/mca.h> 71 #include <machine/md_var.h> 72 #include <machine/smp.h> 73 #include <machine/specialreg.h> 74 #include <x86/init.h> 75 76 #ifdef DDB 77 #include <sys/interrupt.h> 78 #include <ddb/ddb.h> 79 #endif 80 81 #ifdef __amd64__ 82 #define SDT_APIC SDT_SYSIGT 83 #define GSEL_APIC 0 84 #else 85 #define SDT_APIC SDT_SYS386IGT 86 #define GSEL_APIC GSEL(GCODE_SEL, SEL_KPL) 87 #endif 88 89 static MALLOC_DEFINE(M_LAPIC, "local_apic", "Local APIC items"); 90 91 /* Sanity checks on IDT vectors. */ 92 CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT); 93 CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS); 94 CTASSERT(APIC_LOCAL_INTS == 240); 95 CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); 96 97 /* 98 * I/O interrupts use non-negative IRQ values. These values are used 99 * to mark unused IDT entries or IDT entries reserved for a non-I/O 100 * interrupt. 101 */ 102 #define IRQ_FREE -1 103 #define IRQ_TIMER -2 104 #define IRQ_SYSCALL -3 105 #define IRQ_DTRACE_RET -4 106 #define IRQ_EVTCHN -5 107 108 enum lat_timer_mode { 109 LAT_MODE_UNDEF = 0, 110 LAT_MODE_PERIODIC = 1, 111 LAT_MODE_ONESHOT = 2, 112 LAT_MODE_DEADLINE = 3, 113 }; 114 115 /* 116 * Support for local APICs. Local APICs manage interrupts on each 117 * individual processor as opposed to I/O APICs which receive interrupts 118 * from I/O devices and then forward them on to the local APICs. 119 * 120 * Local APICs can also send interrupts to each other thus providing the 121 * mechanism for IPIs. 122 */ 123 124 struct lvt { 125 u_int lvt_edgetrigger:1; 126 u_int lvt_activehi:1; 127 u_int lvt_masked:1; 128 u_int lvt_active:1; 129 u_int lvt_mode:16; 130 u_int lvt_vector:8; 131 }; 132 133 struct lapic { 134 struct lvt la_lvts[APIC_LVT_MAX + 1]; 135 struct lvt la_elvts[APIC_ELVT_MAX + 1]; 136 u_int la_id:8; 137 u_int la_cluster:4; 138 u_int la_cluster_id:2; 139 u_int la_present:1; 140 u_long *la_timer_count; 141 uint64_t la_timer_period; 142 enum lat_timer_mode la_timer_mode; 143 uint32_t lvt_timer_base; 144 uint32_t lvt_timer_last; 145 /* Include IDT_SYSCALL to make indexing easier. */ 146 int la_ioint_irqs[APIC_NUM_IOINTS + 1]; 147 } static *lapics; 148 149 /* Global defaults for local APIC LVT entries. */ 150 static struct lvt lvts[APIC_LVT_MAX + 1] = { 151 { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 }, /* LINT0: masked ExtINT */ 152 { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 }, /* LINT1: NMI */ 153 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT }, /* Timer */ 154 { 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT }, /* Error */ 155 { 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 }, /* PMC */ 156 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT }, /* Thermal */ 157 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT }, /* CMCI */ 158 }; 159 160 /* Global defaults for AMD local APIC ELVT entries. */ 161 static struct lvt elvts[APIC_ELVT_MAX + 1] = { 162 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 163 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, APIC_CMC_INT }, 164 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 165 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 166 }; 167 168 static inthand_t *ioint_handlers[] = { 169 NULL, /* 0 - 31 */ 170 IDTVEC(apic_isr1), /* 32 - 63 */ 171 IDTVEC(apic_isr2), /* 64 - 95 */ 172 IDTVEC(apic_isr3), /* 96 - 127 */ 173 IDTVEC(apic_isr4), /* 128 - 159 */ 174 IDTVEC(apic_isr5), /* 160 - 191 */ 175 IDTVEC(apic_isr6), /* 192 - 223 */ 176 IDTVEC(apic_isr7), /* 224 - 255 */ 177 }; 178 179 static inthand_t *ioint_pti_handlers[] = { 180 NULL, /* 0 - 31 */ 181 IDTVEC(apic_isr1_pti), /* 32 - 63 */ 182 IDTVEC(apic_isr2_pti), /* 64 - 95 */ 183 IDTVEC(apic_isr3_pti), /* 96 - 127 */ 184 IDTVEC(apic_isr4_pti), /* 128 - 159 */ 185 IDTVEC(apic_isr5_pti), /* 160 - 191 */ 186 IDTVEC(apic_isr6_pti), /* 192 - 223 */ 187 IDTVEC(apic_isr7_pti), /* 224 - 255 */ 188 }; 189 190 static u_int32_t lapic_timer_divisors[] = { 191 APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, 192 APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128 193 }; 194 195 extern inthand_t IDTVEC(rsvd_pti), IDTVEC(rsvd); 196 197 volatile char *lapic_map; 198 vm_paddr_t lapic_paddr = DEFAULT_APIC_BASE; 199 int x2apic_mode; 200 int lapic_eoi_suppression; 201 static int lapic_timer_tsc_deadline; 202 static u_long lapic_timer_divisor, count_freq; 203 static struct eventtimer lapic_et; 204 #ifdef SMP 205 static uint64_t lapic_ipi_wait_mult; 206 static int __read_mostly lapic_ds_idle_timeout = 1000000; 207 #endif 208 unsigned int max_apic_id; 209 210 SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 211 "APIC options"); 212 SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, ""); 213 SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD, 214 &lapic_eoi_suppression, 0, ""); 215 SYSCTL_INT(_hw_apic, OID_AUTO, timer_tsc_deadline, CTLFLAG_RD, 216 &lapic_timer_tsc_deadline, 0, ""); 217 #ifdef SMP 218 SYSCTL_INT(_hw_apic, OID_AUTO, ds_idle_timeout, CTLFLAG_RWTUN, 219 &lapic_ds_idle_timeout, 0, 220 "timeout (in us) for APIC Delivery Status to become Idle (xAPIC only)"); 221 #endif 222 223 static void lapic_calibrate_initcount(struct lapic *la); 224 225 /* 226 * Use __nosanitizethread to exempt the LAPIC I/O accessors from KCSan 227 * instrumentation. Otherwise, if x2APIC is not available, use of the global 228 * lapic_map will generate a KCSan false positive. While the mapping is 229 * shared among all CPUs, the physical access will always take place on the 230 * local CPU's APIC, so there isn't in fact a race here. Furthermore, the 231 * KCSan warning printf can cause a panic if issued during LAPIC access, 232 * due to attempted recursive use of event timer resources. 233 */ 234 235 static uint32_t __nosanitizethread 236 lapic_read32(enum LAPIC_REGISTERS reg) 237 { 238 uint32_t res; 239 240 if (x2apic_mode) { 241 res = rdmsr32(MSR_APIC_000 + reg); 242 } else { 243 res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL); 244 } 245 return (res); 246 } 247 248 static void __nosanitizethread 249 lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val) 250 { 251 252 if (x2apic_mode) { 253 mfence(); 254 lfence(); 255 wrmsr(MSR_APIC_000 + reg, val); 256 } else { 257 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 258 } 259 } 260 261 static void __nosanitizethread 262 lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val) 263 { 264 265 if (x2apic_mode) { 266 wrmsr(MSR_APIC_000 + reg, val); 267 } else { 268 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 269 } 270 } 271 272 #ifdef SMP 273 static uint64_t 274 lapic_read_icr_lo(void) 275 { 276 277 return (lapic_read32(LAPIC_ICR_LO)); 278 } 279 280 static void 281 lapic_write_icr(uint32_t vhi, uint32_t vlo) 282 { 283 register_t saveintr; 284 uint64_t v; 285 286 if (x2apic_mode) { 287 v = ((uint64_t)vhi << 32) | vlo; 288 mfence(); 289 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v); 290 } else { 291 saveintr = intr_disable(); 292 lapic_write32(LAPIC_ICR_HI, vhi); 293 lapic_write32(LAPIC_ICR_LO, vlo); 294 intr_restore(saveintr); 295 } 296 } 297 298 static void 299 lapic_write_icr_lo(uint32_t vlo) 300 { 301 302 if (x2apic_mode) { 303 mfence(); 304 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, vlo); 305 } else { 306 lapic_write32(LAPIC_ICR_LO, vlo); 307 } 308 } 309 310 static void 311 lapic_write_self_ipi(uint32_t vector) 312 { 313 314 KASSERT(x2apic_mode, ("SELF IPI write in xAPIC mode")); 315 wrmsr(MSR_APIC_000 + LAPIC_SELF_IPI, vector); 316 } 317 #endif /* SMP */ 318 319 static void 320 lapic_enable_x2apic(void) 321 { 322 uint64_t apic_base; 323 324 apic_base = rdmsr(MSR_APICBASE); 325 apic_base |= APICBASE_X2APIC | APICBASE_ENABLED; 326 wrmsr(MSR_APICBASE, apic_base); 327 } 328 329 bool 330 lapic_is_x2apic(void) 331 { 332 uint64_t apic_base; 333 334 apic_base = rdmsr(MSR_APICBASE); 335 return ((apic_base & (APICBASE_X2APIC | APICBASE_ENABLED)) == 336 (APICBASE_X2APIC | APICBASE_ENABLED)); 337 } 338 339 static void lapic_enable(void); 340 static void lapic_resume(struct pic *pic, bool suspend_cancelled); 341 static void lapic_timer_oneshot(struct lapic *); 342 static void lapic_timer_oneshot_nointr(struct lapic *, uint32_t); 343 static void lapic_timer_periodic(struct lapic *); 344 static void lapic_timer_deadline(struct lapic *); 345 static void lapic_timer_stop(struct lapic *); 346 static void lapic_timer_set_divisor(u_int divisor); 347 static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value); 348 static int lapic_et_start(struct eventtimer *et, 349 sbintime_t first, sbintime_t period); 350 static int lapic_et_stop(struct eventtimer *et); 351 static u_int apic_idt_to_irq(u_int apic_id, u_int vector); 352 static void lapic_set_tpr(u_int vector); 353 354 struct pic lapic_pic = { .pic_resume = lapic_resume }; 355 356 static uint32_t 357 lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value) 358 { 359 360 value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM | 361 APIC_LVT_VECTOR); 362 if (lvt->lvt_edgetrigger == 0) 363 value |= APIC_LVT_TM; 364 if (lvt->lvt_activehi == 0) 365 value |= APIC_LVT_IIPP_INTALO; 366 if (lvt->lvt_masked) 367 value |= APIC_LVT_M; 368 value |= lvt->lvt_mode; 369 switch (lvt->lvt_mode) { 370 case APIC_LVT_DM_NMI: 371 case APIC_LVT_DM_SMI: 372 case APIC_LVT_DM_INIT: 373 case APIC_LVT_DM_EXTINT: 374 if (!lvt->lvt_edgetrigger && bootverbose) { 375 printf("lapic%u: Forcing LINT%u to edge trigger\n", 376 la->la_id, pin); 377 value &= ~APIC_LVT_TM; 378 } 379 /* Use a vector of 0. */ 380 break; 381 case APIC_LVT_DM_FIXED: 382 value |= lvt->lvt_vector; 383 break; 384 default: 385 panic("bad APIC LVT delivery mode: %#x\n", value); 386 } 387 return (value); 388 } 389 390 static uint32_t 391 lvt_mode(struct lapic *la, u_int pin, uint32_t value) 392 { 393 struct lvt *lvt; 394 395 KASSERT(pin <= APIC_LVT_MAX, 396 ("%s: pin %u out of range", __func__, pin)); 397 if (la->la_lvts[pin].lvt_active) 398 lvt = &la->la_lvts[pin]; 399 else 400 lvt = &lvts[pin]; 401 402 return (lvt_mode_impl(la, lvt, pin, value)); 403 } 404 405 static uint32_t 406 elvt_mode(struct lapic *la, u_int idx, uint32_t value) 407 { 408 struct lvt *elvt; 409 410 KASSERT(idx <= APIC_ELVT_MAX, 411 ("%s: idx %u out of range", __func__, idx)); 412 413 elvt = &la->la_elvts[idx]; 414 KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx)); 415 KASSERT(elvt->lvt_edgetrigger, 416 ("%s: ELVT%u is not edge triggered", __func__, idx)); 417 KASSERT(elvt->lvt_activehi, 418 ("%s: ELVT%u is not active high", __func__, idx)); 419 return (lvt_mode_impl(la, elvt, idx, value)); 420 } 421 422 /* 423 * Map the local APIC and setup necessary interrupt vectors. 424 */ 425 void 426 lapic_init(vm_paddr_t addr) 427 { 428 #ifdef SMP 429 uint64_t r, r1, r2, rx; 430 #endif 431 uint32_t ver; 432 int i; 433 bool arat; 434 435 TSENTER(); 436 437 /* 438 * Enable x2APIC mode if possible. Map the local APIC 439 * registers page. 440 * 441 * Keep the LAPIC registers page mapped uncached for x2APIC 442 * mode too, to have direct map page attribute set to 443 * uncached. This is needed to work around CPU errata present 444 * on all Intel processors. 445 */ 446 KASSERT(trunc_page(addr) == addr, 447 ("local APIC not aligned on a page boundary")); 448 lapic_paddr = addr; 449 lapic_map = pmap_mapdev(addr, PAGE_SIZE); 450 if (x2apic_mode) { 451 lapic_enable_x2apic(); 452 lapic_map = NULL; 453 } 454 455 /* Setup the spurious interrupt handler. */ 456 setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL, 457 GSEL_APIC); 458 459 /* Perform basic initialization of the BSP's local APIC. */ 460 lapic_enable(); 461 462 /* Set BSP's per-CPU local APIC ID. */ 463 PCPU_SET(apic_id, lapic_id()); 464 465 /* Local APIC timer interrupt. */ 466 setidt(APIC_TIMER_INT, pti ? IDTVEC(timerint_pti) : IDTVEC(timerint), 467 SDT_APIC, SEL_KPL, GSEL_APIC); 468 469 /* Local APIC error interrupt. */ 470 setidt(APIC_ERROR_INT, pti ? IDTVEC(errorint_pti) : IDTVEC(errorint), 471 SDT_APIC, SEL_KPL, GSEL_APIC); 472 473 /* XXX: Thermal interrupt */ 474 475 /* Local APIC CMCI. */ 476 setidt(APIC_CMC_INT, pti ? IDTVEC(cmcint_pti) : IDTVEC(cmcint), 477 SDT_APIC, SEL_KPL, GSEL_APIC); 478 479 if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) { 480 /* Set if APIC timer runs in C3. */ 481 arat = (cpu_power_eax & CPUTPM1_ARAT); 482 483 bzero(&lapic_et, sizeof(lapic_et)); 484 lapic_et.et_name = "LAPIC"; 485 lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT | 486 ET_FLAGS_PERCPU; 487 lapic_et.et_quality = 600; 488 if (!arat) { 489 lapic_et.et_flags |= ET_FLAGS_C3STOP; 490 lapic_et.et_quality = 100; 491 } 492 if ((cpu_feature & CPUID_TSC) != 0 && 493 (cpu_feature2 & CPUID2_TSCDLT) != 0 && 494 tsc_is_invariant && tsc_freq != 0) { 495 lapic_timer_tsc_deadline = 1; 496 TUNABLE_INT_FETCH("hw.apic.timer_tsc_deadline", 497 &lapic_timer_tsc_deadline); 498 } 499 500 lapic_et.et_frequency = 0; 501 /* We don't know frequency yet, so trying to guess. */ 502 lapic_et.et_min_period = 0x00001000LL; 503 lapic_et.et_max_period = SBT_1S; 504 lapic_et.et_start = lapic_et_start; 505 lapic_et.et_stop = lapic_et_stop; 506 lapic_et.et_priv = NULL; 507 et_register(&lapic_et); 508 } 509 510 /* 511 * Set lapic_eoi_suppression after lapic_enable(), to not 512 * enable suppression in the hardware prematurely. Note that 513 * we by default enable suppression even when system only has 514 * one IO-APIC, since EOI is broadcasted to all APIC agents, 515 * including CPUs, otherwise. 516 * 517 * It seems that at least some KVM versions report 518 * EOI_SUPPRESSION bit, but auto-EOI does not work. 519 */ 520 ver = lapic_read32(LAPIC_VERSION); 521 if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) { 522 lapic_eoi_suppression = 1; 523 if (vm_guest == VM_GUEST_KVM) { 524 if (bootverbose) 525 printf( 526 "KVM -- disabling lapic eoi suppression\n"); 527 lapic_eoi_suppression = 0; 528 } 529 TUNABLE_INT_FETCH("hw.apic.eoi_suppression", 530 &lapic_eoi_suppression); 531 } 532 533 #ifdef SMP 534 #define LOOPS 1000 535 /* 536 * Calibrate the busy loop waiting for IPI ack in xAPIC mode. 537 * lapic_ipi_wait_mult contains the number of iterations which 538 * approximately delay execution for 1 microsecond (the 539 * argument to lapic_ipi_wait() is in microseconds). 540 * 541 * We assume that TSC is present and already measured. 542 * Possible TSC frequency jumps are irrelevant to the 543 * calibration loop below, the CPU clock management code is 544 * not yet started, and we do not enter sleep states. 545 */ 546 KASSERT((cpu_feature & CPUID_TSC) != 0 && tsc_freq != 0, 547 ("TSC not initialized")); 548 if (!x2apic_mode) { 549 r = rdtsc(); 550 for (rx = 0; rx < LOOPS; rx++) { 551 (void)lapic_read_icr_lo(); 552 ia32_pause(); 553 } 554 r = rdtsc() - r; 555 r1 = tsc_freq * LOOPS; 556 r2 = r * 1000000; 557 lapic_ipi_wait_mult = r1 >= r2 ? r1 / r2 : 1; 558 if (bootverbose) { 559 printf("LAPIC: ipi_wait() us multiplier %ju (r %ju " 560 "tsc %ju)\n", (uintmax_t)lapic_ipi_wait_mult, 561 (uintmax_t)r, (uintmax_t)tsc_freq); 562 } 563 } 564 #undef LOOPS 565 #endif /* SMP */ 566 567 TSEXIT(); 568 } 569 570 /* 571 * Create a local APIC instance. 572 */ 573 void 574 lapic_create(u_int apic_id, int boot_cpu) 575 { 576 int i; 577 578 if (apic_id > max_apic_id) { 579 printf("APIC: Ignoring local APIC with ID %d\n", apic_id); 580 if (boot_cpu) 581 panic("Can't ignore BSP"); 582 return; 583 } 584 KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u", 585 apic_id)); 586 587 /* 588 * Assume no local LVT overrides and a cluster of 0 and 589 * intra-cluster ID of 0. 590 */ 591 lapics[apic_id].la_present = 1; 592 lapics[apic_id].la_id = apic_id; 593 for (i = 0; i <= APIC_LVT_MAX; i++) { 594 lapics[apic_id].la_lvts[i] = lvts[i]; 595 lapics[apic_id].la_lvts[i].lvt_active = 0; 596 } 597 for (i = 0; i <= APIC_ELVT_MAX; i++) { 598 lapics[apic_id].la_elvts[i] = elvts[i]; 599 lapics[apic_id].la_elvts[i].lvt_active = 0; 600 } 601 for (i = 0; i <= APIC_NUM_IOINTS; i++) 602 lapics[apic_id].la_ioint_irqs[i] = IRQ_FREE; 603 lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; 604 lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = 605 IRQ_TIMER; 606 #ifdef KDTRACE_HOOKS 607 lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = 608 IRQ_DTRACE_RET; 609 #endif 610 #ifdef XENHVM 611 lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN; 612 #endif 613 614 #ifdef SMP 615 cpu_add(apic_id, boot_cpu); 616 #endif 617 } 618 619 static inline uint32_t 620 amd_read_ext_features(void) 621 { 622 uint32_t version; 623 624 if (cpu_vendor_id != CPU_VENDOR_AMD && 625 cpu_vendor_id != CPU_VENDOR_HYGON) 626 return (0); 627 version = lapic_read32(LAPIC_VERSION); 628 if ((version & APIC_VER_AMD_EXT_SPACE) != 0) 629 return (lapic_read32(LAPIC_EXT_FEATURES)); 630 else 631 return (0); 632 } 633 634 static inline uint32_t 635 amd_read_elvt_count(void) 636 { 637 uint32_t extf; 638 uint32_t count; 639 640 extf = amd_read_ext_features(); 641 count = (extf & APIC_EXTF_ELVT_MASK) >> APIC_EXTF_ELVT_SHIFT; 642 count = min(count, APIC_ELVT_MAX + 1); 643 return (count); 644 } 645 646 /* 647 * Dump contents of local APIC registers 648 */ 649 void 650 lapic_dump(const char* str) 651 { 652 uint32_t version; 653 uint32_t maxlvt; 654 uint32_t extf; 655 int elvt_count; 656 int i; 657 658 version = lapic_read32(LAPIC_VERSION); 659 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 660 printf("cpu%d %s:\n", PCPU_GET(cpuid), str); 661 printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x", 662 lapic_read32(LAPIC_ID), version, 663 lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR)); 664 if ((cpu_feature2 & CPUID2_X2APIC) != 0) 665 printf(" x2APIC: %d", x2apic_mode); 666 printf("\n lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", 667 lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1), 668 lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR)); 669 printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x", 670 lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL), 671 lapic_read32(LAPIC_LVT_ERROR)); 672 if (maxlvt >= APIC_LVT_PMC) 673 printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT)); 674 printf("\n"); 675 if (maxlvt >= APIC_LVT_CMCI) 676 printf(" cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI)); 677 extf = amd_read_ext_features(); 678 if (extf != 0) { 679 printf(" AMD ext features: 0x%08x", extf); 680 elvt_count = amd_read_elvt_count(); 681 for (i = 0; i < elvt_count; i++) 682 printf("%s elvt%d: 0x%08x", (i % 4) ? "" : "\n ", i, 683 lapic_read32(LAPIC_EXT_LVT0 + i)); 684 printf("\n"); 685 } 686 } 687 688 void 689 lapic_xapic_mode(void) 690 { 691 register_t saveintr; 692 693 saveintr = intr_disable(); 694 if (x2apic_mode) 695 lapic_enable_x2apic(); 696 intr_restore(saveintr); 697 } 698 699 void 700 lapic_setup(int boot) 701 { 702 struct lapic *la; 703 uint32_t version; 704 uint32_t maxlvt; 705 register_t saveintr; 706 int elvt_count; 707 int i; 708 709 saveintr = intr_disable(); 710 711 la = &lapics[lapic_id()]; 712 KASSERT(la->la_present, ("missing APIC structure")); 713 version = lapic_read32(LAPIC_VERSION); 714 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 715 716 /* Initialize the TPR to allow all interrupts. */ 717 lapic_set_tpr(0); 718 719 /* Setup spurious vector and enable the local APIC. */ 720 lapic_enable(); 721 722 /* Program LINT[01] LVT entries. */ 723 lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0, 724 lapic_read32(LAPIC_LVT_LINT0))); 725 lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1, 726 lapic_read32(LAPIC_LVT_LINT1))); 727 728 /* Program the PMC LVT entry if present. */ 729 if (maxlvt >= APIC_LVT_PMC) { 730 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 731 LAPIC_LVT_PCINT)); 732 } 733 734 /* 735 * Program the timer LVT. Calibration is deferred until it is certain 736 * that we have a reliable timecounter. 737 */ 738 la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER, 739 lapic_read32(LAPIC_LVT_TIMER)); 740 la->lvt_timer_last = la->lvt_timer_base; 741 lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base); 742 743 if (boot) 744 la->la_timer_mode = LAT_MODE_UNDEF; 745 else if (la->la_timer_mode != LAT_MODE_UNDEF) { 746 KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor", 747 lapic_id())); 748 switch (la->la_timer_mode) { 749 case LAT_MODE_PERIODIC: 750 lapic_timer_set_divisor(lapic_timer_divisor); 751 lapic_timer_periodic(la); 752 break; 753 case LAT_MODE_ONESHOT: 754 lapic_timer_set_divisor(lapic_timer_divisor); 755 lapic_timer_oneshot(la); 756 break; 757 case LAT_MODE_DEADLINE: 758 lapic_timer_deadline(la); 759 break; 760 default: 761 panic("corrupted la_timer_mode %p %d", la, 762 la->la_timer_mode); 763 } 764 } 765 766 /* Program error LVT and clear any existing errors. */ 767 lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR, 768 lapic_read32(LAPIC_LVT_ERROR))); 769 lapic_write32(LAPIC_ESR, 0); 770 771 /* XXX: Thermal LVT */ 772 773 /* Program the CMCI LVT entry if present. */ 774 if (maxlvt >= APIC_LVT_CMCI) { 775 lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI, 776 lapic_read32(LAPIC_LVT_CMCI))); 777 } 778 779 elvt_count = amd_read_elvt_count(); 780 for (i = 0; i < elvt_count; i++) { 781 if (la->la_elvts[i].lvt_active) 782 lapic_write32(LAPIC_EXT_LVT0 + i, 783 elvt_mode(la, i, lapic_read32(LAPIC_EXT_LVT0 + i))); 784 } 785 786 intr_restore(saveintr); 787 } 788 789 static void 790 lapic_intrcnt(void *dummy __unused) 791 { 792 struct pcpu *pc; 793 struct lapic *la; 794 char buf[MAXCOMLEN + 1]; 795 796 /* If there are no APICs, skip this function. */ 797 if (lapics == NULL) 798 return; 799 800 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 801 la = &lapics[pc->pc_apic_id]; 802 if (!la->la_present) 803 continue; 804 805 snprintf(buf, sizeof(buf), "cpu%d:timer", pc->pc_cpuid); 806 intrcnt_add(buf, &la->la_timer_count); 807 } 808 } 809 SYSINIT(lapic_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, lapic_intrcnt, NULL); 810 811 void 812 lapic_reenable_pmc(void) 813 { 814 #ifdef HWPMC_HOOKS 815 uint32_t value; 816 817 value = lapic_read32(LAPIC_LVT_PCINT); 818 value &= ~APIC_LVT_M; 819 lapic_write32(LAPIC_LVT_PCINT, value); 820 #endif 821 } 822 823 #ifdef HWPMC_HOOKS 824 static void 825 lapic_update_pmc(void *dummy) 826 { 827 struct lapic *la; 828 829 la = &lapics[lapic_id()]; 830 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 831 lapic_read32(LAPIC_LVT_PCINT))); 832 } 833 #endif 834 835 void 836 lapic_calibrate_timer(void) 837 { 838 struct lapic *la; 839 register_t intr; 840 841 #ifdef DEV_ATPIC 842 /* Fail if the local APIC is not present. */ 843 if (!x2apic_mode && lapic_map == NULL) 844 return; 845 #endif 846 847 intr = intr_disable(); 848 la = &lapics[lapic_id()]; 849 850 lapic_calibrate_initcount(la); 851 852 intr_restore(intr); 853 854 if (lapic_timer_tsc_deadline && bootverbose) { 855 printf("lapic: deadline tsc mode, Frequency %ju Hz\n", 856 (uintmax_t)tsc_freq); 857 } 858 } 859 860 int 861 lapic_enable_pmc(void) 862 { 863 #ifdef HWPMC_HOOKS 864 u_int32_t maxlvt; 865 866 #ifdef DEV_ATPIC 867 /* Fail if the local APIC is not present. */ 868 if (!x2apic_mode && lapic_map == NULL) 869 return (0); 870 #endif 871 872 /* Fail if the PMC LVT is not present. */ 873 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 874 if (maxlvt < APIC_LVT_PMC) 875 return (0); 876 877 lvts[APIC_LVT_PMC].lvt_masked = 0; 878 879 MPASS(mp_ncpus == 1 || smp_started); 880 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 881 return (1); 882 #else 883 return (0); 884 #endif 885 } 886 887 void 888 lapic_disable_pmc(void) 889 { 890 #ifdef HWPMC_HOOKS 891 u_int32_t maxlvt; 892 893 #ifdef DEV_ATPIC 894 /* Fail if the local APIC is not present. */ 895 if (!x2apic_mode && lapic_map == NULL) 896 return; 897 #endif 898 899 /* Fail if the PMC LVT is not present. */ 900 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 901 if (maxlvt < APIC_LVT_PMC) 902 return; 903 904 lvts[APIC_LVT_PMC].lvt_masked = 1; 905 906 #ifdef SMP 907 /* The APs should always be started when hwpmc is unloaded. */ 908 KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early")); 909 #endif 910 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 911 #endif 912 } 913 914 static int 915 lapic_calibrate_initcount_cpuid_vm(void) 916 { 917 u_int regs[4]; 918 uint64_t freq; 919 920 /* Get value from CPUID leaf if possible. */ 921 if (vm_guest == VM_GUEST_NO) 922 return (false); 923 if (hv_high < 0x40000010) 924 return (false); 925 do_cpuid(0x40000010, regs); 926 freq = (uint64_t)(regs[1]) * 1000; 927 928 /* Pick timer divisor. */ 929 lapic_timer_divisor = 2; 930 do { 931 if (freq / lapic_timer_divisor < APIC_TIMER_MAX_COUNT) 932 break; 933 lapic_timer_divisor <<= 1; 934 } while (lapic_timer_divisor <= 128); 935 if (lapic_timer_divisor > 128) 936 return (false); 937 938 /* Record divided frequency. */ 939 count_freq = freq / lapic_timer_divisor; 940 return (count_freq != 0); 941 } 942 943 static uint64_t 944 cb_lapic_getcount(void) 945 { 946 947 return (APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER)); 948 } 949 950 static void 951 lapic_calibrate_initcount(struct lapic *la) 952 { 953 uint64_t freq; 954 955 if (lapic_calibrate_initcount_cpuid_vm()) 956 goto done; 957 958 /* Calibrate the APIC timer frequency. */ 959 lapic_timer_set_divisor(2); 960 lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT); 961 fpu_kern_enter(curthread, NULL, FPU_KERN_NOCTX); 962 freq = clockcalib(cb_lapic_getcount, "lapic"); 963 fpu_kern_leave(curthread, NULL); 964 965 /* Pick a different divisor if necessary. */ 966 lapic_timer_divisor = 2; 967 do { 968 if (freq * 2 / lapic_timer_divisor < APIC_TIMER_MAX_COUNT) 969 break; 970 lapic_timer_divisor <<= 1; 971 } while (lapic_timer_divisor <= 128); 972 if (lapic_timer_divisor > 128) 973 panic("lapic: Divisor too big"); 974 count_freq = freq * 2 / lapic_timer_divisor; 975 done: 976 if (bootverbose) { 977 printf("lapic: Divisor %lu, Frequency %lu Hz\n", 978 lapic_timer_divisor, count_freq); 979 } 980 } 981 982 static void 983 lapic_change_mode(struct eventtimer *et, struct lapic *la, 984 enum lat_timer_mode newmode) 985 { 986 if (la->la_timer_mode == newmode) 987 return; 988 switch (newmode) { 989 case LAT_MODE_PERIODIC: 990 lapic_timer_set_divisor(lapic_timer_divisor); 991 et->et_frequency = count_freq; 992 break; 993 case LAT_MODE_DEADLINE: 994 et->et_frequency = tsc_freq; 995 break; 996 case LAT_MODE_ONESHOT: 997 lapic_timer_set_divisor(lapic_timer_divisor); 998 et->et_frequency = count_freq; 999 break; 1000 default: 1001 panic("lapic_change_mode %d", newmode); 1002 } 1003 la->la_timer_mode = newmode; 1004 et->et_min_period = (0x00000002LLU << 32) / et->et_frequency; 1005 et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency; 1006 } 1007 1008 static int 1009 lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) 1010 { 1011 struct lapic *la; 1012 1013 la = &lapics[PCPU_GET(apic_id)]; 1014 if (period != 0) { 1015 lapic_change_mode(et, la, LAT_MODE_PERIODIC); 1016 la->la_timer_period = ((uint32_t)et->et_frequency * period) >> 1017 32; 1018 lapic_timer_periodic(la); 1019 } else if (lapic_timer_tsc_deadline) { 1020 lapic_change_mode(et, la, LAT_MODE_DEADLINE); 1021 la->la_timer_period = (et->et_frequency * first) >> 32; 1022 lapic_timer_deadline(la); 1023 } else { 1024 lapic_change_mode(et, la, LAT_MODE_ONESHOT); 1025 la->la_timer_period = ((uint32_t)et->et_frequency * first) >> 1026 32; 1027 lapic_timer_oneshot(la); 1028 } 1029 return (0); 1030 } 1031 1032 static int 1033 lapic_et_stop(struct eventtimer *et) 1034 { 1035 struct lapic *la; 1036 1037 la = &lapics[PCPU_GET(apic_id)]; 1038 lapic_timer_stop(la); 1039 la->la_timer_mode = LAT_MODE_UNDEF; 1040 return (0); 1041 } 1042 1043 void 1044 lapic_disable(void) 1045 { 1046 uint32_t value; 1047 1048 /* Software disable the local APIC. */ 1049 value = lapic_read32(LAPIC_SVR); 1050 value &= ~APIC_SVR_SWEN; 1051 lapic_write32(LAPIC_SVR, value); 1052 } 1053 1054 static void 1055 lapic_enable(void) 1056 { 1057 uint32_t value; 1058 1059 /* Program the spurious vector to enable the local APIC. */ 1060 value = lapic_read32(LAPIC_SVR); 1061 value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS); 1062 value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT; 1063 if (lapic_eoi_suppression) 1064 value |= APIC_SVR_EOI_SUPPRESSION; 1065 lapic_write32(LAPIC_SVR, value); 1066 } 1067 1068 /* Reset the local APIC on the BSP during resume. */ 1069 static void 1070 lapic_resume(struct pic *pic, bool suspend_cancelled) 1071 { 1072 1073 lapic_setup(0); 1074 } 1075 1076 int 1077 lapic_id(void) 1078 { 1079 uint32_t v; 1080 1081 KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped")); 1082 v = lapic_read32(LAPIC_ID); 1083 if (!x2apic_mode) 1084 v >>= APIC_ID_SHIFT; 1085 return (v); 1086 } 1087 1088 int 1089 lapic_intr_pending(u_int vector) 1090 { 1091 uint32_t irr; 1092 1093 /* 1094 * The IRR registers are an array of registers each of which 1095 * only describes 32 interrupts in the low 32 bits. Thus, we 1096 * divide the vector by 32 to get the register index. 1097 * Finally, we modulus the vector by 32 to determine the 1098 * individual bit to test. 1099 */ 1100 irr = lapic_read32(LAPIC_IRR0 + vector / 32); 1101 return (irr & 1 << (vector % 32)); 1102 } 1103 1104 void 1105 lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) 1106 { 1107 struct lapic *la; 1108 1109 KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist", 1110 __func__, apic_id)); 1111 KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big", 1112 __func__, cluster)); 1113 KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID, 1114 ("%s: intra cluster id %u too big", __func__, cluster_id)); 1115 la = &lapics[apic_id]; 1116 la->la_cluster = cluster; 1117 la->la_cluster_id = cluster_id; 1118 } 1119 1120 int 1121 lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked) 1122 { 1123 1124 if (pin > APIC_LVT_MAX) 1125 return (EINVAL); 1126 if (apic_id == APIC_ID_ALL) { 1127 lvts[pin].lvt_masked = masked; 1128 if (bootverbose) 1129 printf("lapic:"); 1130 } else { 1131 KASSERT(lapics[apic_id].la_present, 1132 ("%s: missing APIC %u", __func__, apic_id)); 1133 lapics[apic_id].la_lvts[pin].lvt_masked = masked; 1134 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1135 if (bootverbose) 1136 printf("lapic%u:", apic_id); 1137 } 1138 if (bootverbose) 1139 printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked"); 1140 return (0); 1141 } 1142 1143 int 1144 lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode) 1145 { 1146 struct lvt *lvt; 1147 1148 if (pin > APIC_LVT_MAX) 1149 return (EINVAL); 1150 if (apic_id == APIC_ID_ALL) { 1151 lvt = &lvts[pin]; 1152 if (bootverbose) 1153 printf("lapic:"); 1154 } else { 1155 KASSERT(lapics[apic_id].la_present, 1156 ("%s: missing APIC %u", __func__, apic_id)); 1157 lvt = &lapics[apic_id].la_lvts[pin]; 1158 lvt->lvt_active = 1; 1159 if (bootverbose) 1160 printf("lapic%u:", apic_id); 1161 } 1162 lvt->lvt_mode = mode; 1163 switch (mode) { 1164 case APIC_LVT_DM_NMI: 1165 case APIC_LVT_DM_SMI: 1166 case APIC_LVT_DM_INIT: 1167 case APIC_LVT_DM_EXTINT: 1168 lvt->lvt_edgetrigger = 1; 1169 lvt->lvt_activehi = 1; 1170 if (mode == APIC_LVT_DM_EXTINT) 1171 lvt->lvt_masked = 1; 1172 else 1173 lvt->lvt_masked = 0; 1174 break; 1175 default: 1176 panic("Unsupported delivery mode: 0x%x\n", mode); 1177 } 1178 if (bootverbose) { 1179 printf(" Routing "); 1180 switch (mode) { 1181 case APIC_LVT_DM_NMI: 1182 printf("NMI"); 1183 break; 1184 case APIC_LVT_DM_SMI: 1185 printf("SMI"); 1186 break; 1187 case APIC_LVT_DM_INIT: 1188 printf("INIT"); 1189 break; 1190 case APIC_LVT_DM_EXTINT: 1191 printf("ExtINT"); 1192 break; 1193 } 1194 printf(" -> LINT%u\n", pin); 1195 } 1196 return (0); 1197 } 1198 1199 int 1200 lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol) 1201 { 1202 1203 if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM) 1204 return (EINVAL); 1205 if (apic_id == APIC_ID_ALL) { 1206 lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); 1207 if (bootverbose) 1208 printf("lapic:"); 1209 } else { 1210 KASSERT(lapics[apic_id].la_present, 1211 ("%s: missing APIC %u", __func__, apic_id)); 1212 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1213 lapics[apic_id].la_lvts[pin].lvt_activehi = 1214 (pol == INTR_POLARITY_HIGH); 1215 if (bootverbose) 1216 printf("lapic%u:", apic_id); 1217 } 1218 if (bootverbose) 1219 printf(" LINT%u polarity: %s\n", pin, 1220 pol == INTR_POLARITY_HIGH ? "high" : "low"); 1221 return (0); 1222 } 1223 1224 int 1225 lapic_set_lvt_triggermode(u_int apic_id, u_int pin, 1226 enum intr_trigger trigger) 1227 { 1228 1229 if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM) 1230 return (EINVAL); 1231 if (apic_id == APIC_ID_ALL) { 1232 lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); 1233 if (bootverbose) 1234 printf("lapic:"); 1235 } else { 1236 KASSERT(lapics[apic_id].la_present, 1237 ("%s: missing APIC %u", __func__, apic_id)); 1238 lapics[apic_id].la_lvts[pin].lvt_edgetrigger = 1239 (trigger == INTR_TRIGGER_EDGE); 1240 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1241 if (bootverbose) 1242 printf("lapic%u:", apic_id); 1243 } 1244 if (bootverbose) 1245 printf(" LINT%u trigger: %s\n", pin, 1246 trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); 1247 return (0); 1248 } 1249 1250 /* 1251 * Adjust the TPR of the current CPU so that it blocks all interrupts below 1252 * the passed in vector. 1253 */ 1254 static void 1255 lapic_set_tpr(u_int vector) 1256 { 1257 #ifdef CHEAP_TPR 1258 lapic_write32(LAPIC_TPR, vector); 1259 #else 1260 uint32_t tpr; 1261 1262 tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO; 1263 tpr |= vector; 1264 lapic_write32(LAPIC_TPR, tpr); 1265 #endif 1266 } 1267 1268 void 1269 lapic_eoi(void) 1270 { 1271 1272 lapic_write32_nofence(LAPIC_EOI, 0); 1273 } 1274 1275 void 1276 lapic_handle_intr(int vector, struct trapframe *frame) 1277 { 1278 struct intsrc *isrc; 1279 1280 kasan_mark(frame, sizeof(*frame), sizeof(*frame), 0); 1281 kmsan_mark(&vector, sizeof(vector), KMSAN_STATE_INITED); 1282 kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED); 1283 trap_check_kstack(); 1284 1285 isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id), 1286 vector)); 1287 intr_execute_handlers(isrc, frame); 1288 } 1289 1290 void 1291 lapic_handle_timer(struct trapframe *frame) 1292 { 1293 struct lapic *la; 1294 struct trapframe *oldframe; 1295 struct thread *td; 1296 1297 /* Send EOI first thing. */ 1298 lapic_eoi(); 1299 1300 kasan_mark(frame, sizeof(*frame), sizeof(*frame), 0); 1301 kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED); 1302 trap_check_kstack(); 1303 1304 #if defined(SMP) && !defined(SCHED_ULE) 1305 /* 1306 * Don't do any accounting for the disabled HTT cores, since it 1307 * will provide misleading numbers for the userland. 1308 * 1309 * No locking is necessary here, since even if we lose the race 1310 * when hlt_cpus_mask changes it is not a big deal, really. 1311 * 1312 * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask 1313 * and unlike other schedulers it actually schedules threads to 1314 * those CPUs. 1315 */ 1316 if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)) 1317 return; 1318 #endif 1319 1320 /* Look up our local APIC structure for the tick counters. */ 1321 la = &lapics[PCPU_GET(apic_id)]; 1322 (*la->la_timer_count)++; 1323 critical_enter(); 1324 if (lapic_et.et_active) { 1325 td = curthread; 1326 td->td_intr_nesting_level++; 1327 oldframe = td->td_intr_frame; 1328 td->td_intr_frame = frame; 1329 lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg); 1330 td->td_intr_frame = oldframe; 1331 td->td_intr_nesting_level--; 1332 } 1333 critical_exit(); 1334 } 1335 1336 static void 1337 lapic_timer_set_divisor(u_int divisor) 1338 { 1339 1340 KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor)); 1341 KASSERT(ffs(divisor) <= nitems(lapic_timer_divisors), 1342 ("lapic: invalid divisor %u", divisor)); 1343 lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]); 1344 } 1345 1346 static void 1347 lapic_timer_oneshot(struct lapic *la) 1348 { 1349 uint32_t value; 1350 1351 value = la->lvt_timer_base; 1352 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1353 value |= APIC_LVTT_TM_ONE_SHOT; 1354 la->lvt_timer_last = value; 1355 lapic_write32(LAPIC_LVT_TIMER, value); 1356 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1357 } 1358 1359 static void 1360 lapic_timer_oneshot_nointr(struct lapic *la, uint32_t count) 1361 { 1362 uint32_t value; 1363 1364 value = la->lvt_timer_base; 1365 value &= ~APIC_LVTT_TM; 1366 value |= APIC_LVTT_TM_ONE_SHOT | APIC_LVT_M; 1367 la->lvt_timer_last = value; 1368 lapic_write32(LAPIC_LVT_TIMER, value); 1369 lapic_write32(LAPIC_ICR_TIMER, count); 1370 } 1371 1372 static void 1373 lapic_timer_periodic(struct lapic *la) 1374 { 1375 uint32_t value; 1376 1377 value = la->lvt_timer_base; 1378 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1379 value |= APIC_LVTT_TM_PERIODIC; 1380 la->lvt_timer_last = value; 1381 lapic_write32(LAPIC_LVT_TIMER, value); 1382 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1383 } 1384 1385 static void 1386 lapic_timer_deadline(struct lapic *la) 1387 { 1388 uint32_t value; 1389 1390 value = la->lvt_timer_base; 1391 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1392 value |= APIC_LVTT_TM_TSCDLT; 1393 if (value != la->lvt_timer_last) { 1394 la->lvt_timer_last = value; 1395 lapic_write32_nofence(LAPIC_LVT_TIMER, value); 1396 if (!x2apic_mode) 1397 mfence(); 1398 } 1399 wrmsr(MSR_TSC_DEADLINE, la->la_timer_period + rdtsc()); 1400 } 1401 1402 static void 1403 lapic_timer_stop(struct lapic *la) 1404 { 1405 uint32_t value; 1406 1407 if (la->la_timer_mode == LAT_MODE_DEADLINE) { 1408 wrmsr(MSR_TSC_DEADLINE, 0); 1409 mfence(); 1410 } else { 1411 value = la->lvt_timer_base; 1412 value &= ~APIC_LVTT_TM; 1413 value |= APIC_LVT_M; 1414 la->lvt_timer_last = value; 1415 lapic_write32(LAPIC_LVT_TIMER, value); 1416 } 1417 } 1418 1419 void 1420 lapic_handle_cmc(void) 1421 { 1422 trap_check_kstack(); 1423 1424 lapic_eoi(); 1425 cmc_intr(); 1426 } 1427 1428 /* 1429 * Called from the mca_init() to activate the CMC interrupt if this CPU is 1430 * responsible for monitoring any MC banks for CMC events. Since mca_init() 1431 * is called prior to lapic_setup() during boot, this just needs to unmask 1432 * this CPU's LVT_CMCI entry. 1433 */ 1434 void 1435 lapic_enable_cmc(void) 1436 { 1437 u_int apic_id; 1438 1439 #ifdef DEV_ATPIC 1440 if (!x2apic_mode && lapic_map == NULL) 1441 return; 1442 #endif 1443 apic_id = PCPU_GET(apic_id); 1444 KASSERT(lapics[apic_id].la_present, 1445 ("%s: missing APIC %u", __func__, apic_id)); 1446 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0; 1447 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1; 1448 } 1449 1450 int 1451 lapic_enable_mca_elvt(void) 1452 { 1453 u_int apic_id; 1454 uint32_t value; 1455 int elvt_count; 1456 1457 #ifdef DEV_ATPIC 1458 if (lapic_map == NULL) 1459 return (-1); 1460 #endif 1461 1462 apic_id = PCPU_GET(apic_id); 1463 KASSERT(lapics[apic_id].la_present, 1464 ("%s: missing APIC %u", __func__, apic_id)); 1465 elvt_count = amd_read_elvt_count(); 1466 if (elvt_count <= APIC_ELVT_MCA) 1467 return (-1); 1468 1469 value = lapic_read32(LAPIC_EXT_LVT0 + APIC_ELVT_MCA); 1470 if ((value & APIC_LVT_M) == 0) { 1471 if (bootverbose) 1472 printf("AMD MCE Thresholding Extended LVT is already active\n"); 1473 return (APIC_ELVT_MCA); 1474 } 1475 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_masked = 0; 1476 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_active = 1; 1477 return (APIC_ELVT_MCA); 1478 } 1479 1480 void 1481 lapic_handle_error(void) 1482 { 1483 uint32_t esr; 1484 1485 trap_check_kstack(); 1486 1487 /* 1488 * Read the contents of the error status register. Write to 1489 * the register first before reading from it to force the APIC 1490 * to update its value to indicate any errors that have 1491 * occurred since the previous write to the register. 1492 */ 1493 lapic_write32(LAPIC_ESR, 0); 1494 esr = lapic_read32(LAPIC_ESR); 1495 1496 printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr); 1497 lapic_eoi(); 1498 } 1499 1500 u_int 1501 apic_cpuid(u_int apic_id) 1502 { 1503 #ifdef SMP 1504 return apic_cpuids[apic_id]; 1505 #else 1506 return 0; 1507 #endif 1508 } 1509 1510 /* Request a free IDT vector to be used by the specified IRQ. */ 1511 u_int 1512 apic_alloc_vector(u_int apic_id, u_int irq) 1513 { 1514 u_int vector; 1515 1516 KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); 1517 1518 /* 1519 * Search for a free vector. Currently we just use a very simple 1520 * algorithm to find the first free vector. 1521 */ 1522 mtx_lock_spin(&icu_lock); 1523 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1524 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) 1525 continue; 1526 lapics[apic_id].la_ioint_irqs[vector] = irq; 1527 mtx_unlock_spin(&icu_lock); 1528 return (vector + APIC_IO_INTS); 1529 } 1530 mtx_unlock_spin(&icu_lock); 1531 return (0); 1532 } 1533 1534 /* 1535 * Request 'count' free contiguous IDT vectors to be used by 'count' 1536 * IRQs. 'count' must be a power of two and the vectors will be 1537 * aligned on a boundary of 'align'. If the request cannot be 1538 * satisfied, 0 is returned. 1539 */ 1540 u_int 1541 apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) 1542 { 1543 u_int first, run, vector; 1544 1545 KASSERT(powerof2(count), ("bad count")); 1546 KASSERT(powerof2(align), ("bad align")); 1547 KASSERT(align >= count, ("align < count")); 1548 #ifdef INVARIANTS 1549 for (run = 0; run < count; run++) 1550 KASSERT(irqs[run] < num_io_irqs, ("Invalid IRQ %u at index %u", 1551 irqs[run], run)); 1552 #endif 1553 1554 /* 1555 * Search for 'count' free vectors. As with apic_alloc_vector(), 1556 * this just uses a simple first fit algorithm. 1557 */ 1558 run = 0; 1559 first = 0; 1560 mtx_lock_spin(&icu_lock); 1561 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1562 /* Vector is in use, end run. */ 1563 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) { 1564 run = 0; 1565 first = 0; 1566 continue; 1567 } 1568 1569 /* Start a new run if run == 0 and vector is aligned. */ 1570 if (run == 0) { 1571 if (((vector + APIC_IO_INTS) & (align - 1)) != 0) 1572 continue; 1573 first = vector; 1574 } 1575 run++; 1576 1577 /* Keep looping if the run isn't long enough yet. */ 1578 if (run < count) 1579 continue; 1580 1581 /* Found a run, assign IRQs and return the first vector. */ 1582 for (vector = 0; vector < count; vector++) 1583 lapics[apic_id].la_ioint_irqs[first + vector] = 1584 irqs[vector]; 1585 mtx_unlock_spin(&icu_lock); 1586 return (first + APIC_IO_INTS); 1587 } 1588 mtx_unlock_spin(&icu_lock); 1589 printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count); 1590 return (0); 1591 } 1592 1593 /* 1594 * Enable a vector for a particular apic_id. Since all lapics share idt 1595 * entries and ioint_handlers this enables the vector on all lapics. lapics 1596 * which do not have the vector configured would report spurious interrupts 1597 * should it fire. 1598 */ 1599 void 1600 apic_enable_vector(u_int apic_id, u_int vector) 1601 { 1602 1603 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1604 KASSERT(ioint_handlers[vector / 32] != NULL, 1605 ("No ISR handler for vector %u", vector)); 1606 #ifdef KDTRACE_HOOKS 1607 KASSERT(vector != IDT_DTRACE_RET, 1608 ("Attempt to overwrite DTrace entry")); 1609 #endif 1610 setidt(vector, (pti ? ioint_pti_handlers : ioint_handlers)[vector / 32], 1611 SDT_APIC, SEL_KPL, GSEL_APIC); 1612 } 1613 1614 void 1615 apic_disable_vector(u_int apic_id, u_int vector) 1616 { 1617 1618 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1619 #ifdef KDTRACE_HOOKS 1620 KASSERT(vector != IDT_DTRACE_RET, 1621 ("Attempt to overwrite DTrace entry")); 1622 #endif 1623 KASSERT(ioint_handlers[vector / 32] != NULL, 1624 ("No ISR handler for vector %u", vector)); 1625 #ifdef notyet 1626 /* 1627 * We can not currently clear the idt entry because other cpus 1628 * may have a valid vector at this offset. 1629 */ 1630 setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC, 1631 SEL_KPL, GSEL_APIC); 1632 #endif 1633 } 1634 1635 /* Release an APIC vector when it's no longer in use. */ 1636 void 1637 apic_free_vector(u_int apic_id, u_int vector, u_int irq) 1638 { 1639 struct thread *td; 1640 1641 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1642 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1643 ("Vector %u does not map to an IRQ line", vector)); 1644 KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); 1645 KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] == 1646 irq, ("IRQ mismatch")); 1647 #ifdef KDTRACE_HOOKS 1648 KASSERT(vector != IDT_DTRACE_RET, 1649 ("Attempt to overwrite DTrace entry")); 1650 #endif 1651 1652 /* 1653 * Bind us to the cpu that owned the vector before freeing it so 1654 * we don't lose an interrupt delivery race. 1655 */ 1656 td = curthread; 1657 if (!rebooting) { 1658 thread_lock(td); 1659 if (sched_is_bound(td)) 1660 panic("apic_free_vector: Thread already bound.\n"); 1661 sched_bind(td, apic_cpuid(apic_id)); 1662 thread_unlock(td); 1663 } 1664 mtx_lock_spin(&icu_lock); 1665 lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = IRQ_FREE; 1666 mtx_unlock_spin(&icu_lock); 1667 if (!rebooting) { 1668 thread_lock(td); 1669 sched_unbind(td); 1670 thread_unlock(td); 1671 } 1672 } 1673 1674 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */ 1675 static u_int 1676 apic_idt_to_irq(u_int apic_id, u_int vector) 1677 { 1678 int irq; 1679 1680 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1681 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1682 ("Vector %u does not map to an IRQ line", vector)); 1683 #ifdef KDTRACE_HOOKS 1684 KASSERT(vector != IDT_DTRACE_RET, 1685 ("Attempt to overwrite DTrace entry")); 1686 #endif 1687 irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]; 1688 if (irq < 0) 1689 irq = 0; 1690 return (irq); 1691 } 1692 1693 #ifdef DDB 1694 /* 1695 * Dump data about APIC IDT vector mappings. 1696 */ 1697 DB_SHOW_COMMAND_FLAGS(apic, db_show_apic, DB_CMD_MEMSAFE) 1698 { 1699 struct intsrc *isrc; 1700 int i, verbose; 1701 u_int apic_id; 1702 u_int irq; 1703 1704 if (strcmp(modif, "vv") == 0) 1705 verbose = 2; 1706 else if (strcmp(modif, "v") == 0) 1707 verbose = 1; 1708 else 1709 verbose = 0; 1710 for (apic_id = 0; apic_id <= max_apic_id; apic_id++) { 1711 if (lapics[apic_id].la_present == 0) 1712 continue; 1713 db_printf("Interrupts bound to lapic %u\n", apic_id); 1714 for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) { 1715 irq = lapics[apic_id].la_ioint_irqs[i]; 1716 if (irq == IRQ_FREE || irq == IRQ_SYSCALL) 1717 continue; 1718 #ifdef KDTRACE_HOOKS 1719 if (irq == IRQ_DTRACE_RET) 1720 continue; 1721 #endif 1722 #ifdef XENHVM 1723 if (irq == IRQ_EVTCHN) 1724 continue; 1725 #endif 1726 db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); 1727 if (irq == IRQ_TIMER) 1728 db_printf("lapic timer\n"); 1729 else if (irq < num_io_irqs) { 1730 isrc = intr_lookup_source(irq); 1731 if (isrc == NULL || verbose == 0) 1732 db_printf("IRQ %u\n", irq); 1733 else 1734 db_dump_intr_event(isrc->is_event, 1735 verbose == 2); 1736 } else 1737 db_printf("IRQ %u ???\n", irq); 1738 } 1739 } 1740 } 1741 1742 static void 1743 dump_mask(const char *prefix, uint32_t v, int base) 1744 { 1745 int i, first; 1746 1747 first = 1; 1748 for (i = 0; i < 32; i++) 1749 if (v & (1 << i)) { 1750 if (first) { 1751 db_printf("%s:", prefix); 1752 first = 0; 1753 } 1754 db_printf(" %02x", base + i); 1755 } 1756 if (!first) 1757 db_printf("\n"); 1758 } 1759 1760 /* Show info from the lapic regs for this CPU. */ 1761 DB_SHOW_COMMAND_FLAGS(lapic, db_show_lapic, DB_CMD_MEMSAFE) 1762 { 1763 uint32_t v; 1764 1765 db_printf("lapic ID = %d\n", lapic_id()); 1766 v = lapic_read32(LAPIC_VERSION); 1767 db_printf("version = %d.%d\n", (v & APIC_VER_VERSION) >> 4, 1768 v & 0xf); 1769 db_printf("max LVT = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT); 1770 v = lapic_read32(LAPIC_SVR); 1771 db_printf("SVR = %02x (%s)\n", v & APIC_SVR_VECTOR, 1772 v & APIC_SVR_ENABLE ? "enabled" : "disabled"); 1773 db_printf("TPR = %02x\n", lapic_read32(LAPIC_TPR)); 1774 1775 #define dump_field(prefix, regn, index) \ 1776 dump_mask(__XSTRING(prefix ## index), \ 1777 lapic_read32(LAPIC_ ## regn ## index), \ 1778 index * 32) 1779 1780 db_printf("In-service Interrupts:\n"); 1781 dump_field(isr, ISR, 0); 1782 dump_field(isr, ISR, 1); 1783 dump_field(isr, ISR, 2); 1784 dump_field(isr, ISR, 3); 1785 dump_field(isr, ISR, 4); 1786 dump_field(isr, ISR, 5); 1787 dump_field(isr, ISR, 6); 1788 dump_field(isr, ISR, 7); 1789 1790 db_printf("TMR Interrupts:\n"); 1791 dump_field(tmr, TMR, 0); 1792 dump_field(tmr, TMR, 1); 1793 dump_field(tmr, TMR, 2); 1794 dump_field(tmr, TMR, 3); 1795 dump_field(tmr, TMR, 4); 1796 dump_field(tmr, TMR, 5); 1797 dump_field(tmr, TMR, 6); 1798 dump_field(tmr, TMR, 7); 1799 1800 db_printf("IRR Interrupts:\n"); 1801 dump_field(irr, IRR, 0); 1802 dump_field(irr, IRR, 1); 1803 dump_field(irr, IRR, 2); 1804 dump_field(irr, IRR, 3); 1805 dump_field(irr, IRR, 4); 1806 dump_field(irr, IRR, 5); 1807 dump_field(irr, IRR, 6); 1808 dump_field(irr, IRR, 7); 1809 1810 #undef dump_field 1811 } 1812 #endif 1813 1814 /* 1815 * APIC probing support code. This includes code to manage enumerators. 1816 */ 1817 1818 static SLIST_HEAD(, apic_enumerator) enumerators = 1819 SLIST_HEAD_INITIALIZER(enumerators); 1820 static struct apic_enumerator *best_enum; 1821 1822 void 1823 apic_register_enumerator(struct apic_enumerator *enumerator) 1824 { 1825 #ifdef INVARIANTS 1826 struct apic_enumerator *apic_enum; 1827 1828 SLIST_FOREACH(apic_enum, &enumerators, apic_next) { 1829 if (apic_enum == enumerator) 1830 panic("%s: Duplicate register of %s", __func__, 1831 enumerator->apic_name); 1832 } 1833 #endif 1834 SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next); 1835 } 1836 1837 /* 1838 * We have to look for CPU's very, very early because certain subsystems 1839 * want to know how many CPU's we have extremely early on in the boot 1840 * process. 1841 */ 1842 static void 1843 apic_init(void *dummy __unused) 1844 { 1845 struct apic_enumerator *enumerator; 1846 int retval, best; 1847 1848 /* We only support built in local APICs. */ 1849 if (!(cpu_feature & CPUID_APIC)) 1850 return; 1851 1852 /* Don't probe if APIC mode is disabled. */ 1853 if (resource_disabled("apic", 0)) 1854 return; 1855 1856 /* Probe all the enumerators to find the best match. */ 1857 best_enum = NULL; 1858 best = 0; 1859 SLIST_FOREACH(enumerator, &enumerators, apic_next) { 1860 retval = enumerator->apic_probe(); 1861 if (retval > 0) 1862 continue; 1863 if (best_enum == NULL || best < retval) { 1864 best_enum = enumerator; 1865 best = retval; 1866 } 1867 } 1868 if (best_enum == NULL) { 1869 if (bootverbose) 1870 printf("APIC: Could not find any APICs.\n"); 1871 #ifndef DEV_ATPIC 1872 panic("running without device atpic requires a local APIC"); 1873 #endif 1874 return; 1875 } 1876 1877 if (bootverbose) 1878 printf("APIC: Using the %s enumerator.\n", 1879 best_enum->apic_name); 1880 1881 #ifdef I686_CPU 1882 /* 1883 * To work around an errata, we disable the local APIC on some 1884 * CPUs during early startup. We need to turn the local APIC back 1885 * on on such CPUs now. 1886 */ 1887 ppro_reenable_apic(); 1888 #endif 1889 1890 /* Probe the CPU's in the system. */ 1891 retval = best_enum->apic_probe_cpus(); 1892 if (retval != 0) 1893 printf("%s: Failed to probe CPUs: returned %d\n", 1894 best_enum->apic_name, retval); 1895 1896 } 1897 SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL); 1898 1899 /* 1900 * Setup the local APIC. We have to do this prior to starting up the APs 1901 * in the SMP case. 1902 */ 1903 static void 1904 apic_setup_local(void *dummy __unused) 1905 { 1906 int retval; 1907 1908 if (best_enum == NULL) 1909 return; 1910 1911 lapics = malloc(sizeof(*lapics) * (max_apic_id + 1), M_LAPIC, 1912 M_WAITOK | M_ZERO); 1913 1914 /* Initialize the local APIC. */ 1915 retval = best_enum->apic_setup_local(); 1916 if (retval != 0) 1917 printf("%s: Failed to setup the local APIC: returned %d\n", 1918 best_enum->apic_name, retval); 1919 } 1920 SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL); 1921 1922 /* 1923 * Setup the I/O APICs. 1924 */ 1925 static void 1926 apic_setup_io(void *dummy __unused) 1927 { 1928 int retval; 1929 1930 if (best_enum == NULL) 1931 return; 1932 1933 /* 1934 * Local APIC must be registered before other PICs and pseudo PICs 1935 * for proper suspend/resume order. 1936 */ 1937 intr_register_pic(&lapic_pic); 1938 1939 retval = best_enum->apic_setup_io(); 1940 if (retval != 0) 1941 printf("%s: Failed to setup I/O APICs: returned %d\n", 1942 best_enum->apic_name, retval); 1943 1944 /* 1945 * Finish setting up the local APIC on the BSP once we know 1946 * how to properly program the LINT pins. In particular, this 1947 * enables the EOI suppression mode, if LAPIC supports it and 1948 * user did not disable the mode. 1949 */ 1950 lapic_setup(1); 1951 if (bootverbose) 1952 lapic_dump("BSP"); 1953 1954 /* Enable the MSI "pic". */ 1955 msi_init(); 1956 1957 #ifdef XENHVM 1958 xen_intr_alloc_irqs(); 1959 #endif 1960 } 1961 SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL); 1962 1963 #ifdef SMP 1964 /* 1965 * Inter Processor Interrupt functions. The lapic_ipi_*() functions are 1966 * private to the MD code. The public interface for the rest of the 1967 * kernel is defined in mp_machdep.c. 1968 */ 1969 1970 /* 1971 * Wait delay microseconds for IPI to be sent. If delay is -1, we 1972 * wait forever. 1973 */ 1974 int 1975 lapic_ipi_wait(int delay) 1976 { 1977 uint64_t rx; 1978 1979 /* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */ 1980 if (x2apic_mode) 1981 return (1); 1982 1983 for (rx = 0; delay == -1 || rx < lapic_ipi_wait_mult * delay; rx++) { 1984 if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) == 1985 APIC_DELSTAT_IDLE) 1986 return (1); 1987 ia32_pause(); 1988 } 1989 return (0); 1990 } 1991 1992 void 1993 lapic_ipi_raw(register_t icrlo, u_int dest) 1994 { 1995 uint32_t icrhi; 1996 1997 /* XXX: Need more sanity checking of icrlo? */ 1998 KASSERT(x2apic_mode || lapic_map != NULL, 1999 ("%s called too early", __func__)); 2000 KASSERT(x2apic_mode || 2001 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 2002 ("%s: invalid dest field", __func__)); 2003 KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0, 2004 ("%s: reserved bits set in ICR LO register", __func__)); 2005 2006 if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) { 2007 if (x2apic_mode) 2008 icrhi = dest; 2009 else 2010 icrhi = dest << APIC_ID_SHIFT; 2011 lapic_write_icr(icrhi, icrlo); 2012 } else { 2013 lapic_write_icr_lo(icrlo); 2014 } 2015 } 2016 2017 #ifdef DETECT_DEADLOCK 2018 #define AFTER_SPIN 50 2019 #endif 2020 2021 static void 2022 native_lapic_ipi_vectored(u_int vector, int dest) 2023 { 2024 register_t icrlo, destfield; 2025 2026 KASSERT((vector & ~APIC_VECTOR_MASK) == 0, 2027 ("%s: invalid vector %d", __func__, vector)); 2028 2029 destfield = 0; 2030 switch (dest) { 2031 case APIC_IPI_DEST_SELF: 2032 if (x2apic_mode && vector < IPI_NMI_FIRST) { 2033 lapic_write_self_ipi(vector); 2034 return; 2035 } 2036 icrlo = APIC_DEST_SELF; 2037 break; 2038 case APIC_IPI_DEST_ALL: 2039 icrlo = APIC_DEST_ALLISELF; 2040 break; 2041 case APIC_IPI_DEST_OTHERS: 2042 icrlo = APIC_DEST_ALLESELF; 2043 break; 2044 default: 2045 icrlo = 0; 2046 KASSERT(x2apic_mode || 2047 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 2048 ("%s: invalid destination 0x%x", __func__, dest)); 2049 destfield = dest; 2050 } 2051 2052 /* 2053 * NMI IPIs are just fake vectors used to send a NMI. Use special rules 2054 * regarding NMIs if passed, otherwise specify the vector. 2055 */ 2056 if (vector >= IPI_NMI_FIRST) 2057 icrlo |= APIC_DELMODE_NMI; 2058 else 2059 icrlo |= vector | APIC_DELMODE_FIXED; 2060 icrlo |= APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT; 2061 2062 /* Wait for an earlier IPI to finish. */ 2063 if (!lapic_ipi_wait(lapic_ds_idle_timeout)) { 2064 if (KERNEL_PANICKED()) 2065 return; 2066 else 2067 panic("APIC: Previous IPI is stuck"); 2068 } 2069 2070 lapic_ipi_raw(icrlo, destfield); 2071 2072 #ifdef DETECT_DEADLOCK 2073 /* Wait for IPI to be delivered. */ 2074 if (!lapic_ipi_wait(AFTER_SPIN)) { 2075 #ifdef needsattention 2076 /* 2077 * XXX FIXME: 2078 * 2079 * The above function waits for the message to actually be 2080 * delivered. It breaks out after an arbitrary timeout 2081 * since the message should eventually be delivered (at 2082 * least in theory) and that if it wasn't we would catch 2083 * the failure with the check above when the next IPI is 2084 * sent. 2085 * 2086 * We could skip this wait entirely, EXCEPT it probably 2087 * protects us from other routines that assume that the 2088 * message was delivered and acted upon when this function 2089 * returns. 2090 */ 2091 printf("APIC: IPI might be stuck\n"); 2092 #else /* !needsattention */ 2093 /* Wait until mesage is sent without a timeout. */ 2094 while (lapic_read_icr_lo() & APIC_DELSTAT_PEND) 2095 ia32_pause(); 2096 #endif /* needsattention */ 2097 } 2098 #endif /* DETECT_DEADLOCK */ 2099 } 2100 2101 void (*ipi_vectored)(u_int, int) = &native_lapic_ipi_vectored; 2102 #endif /* SMP */ 2103 2104 /* 2105 * Since the IDT is shared by all CPUs the IPI slot update needs to be globally 2106 * visible. 2107 * 2108 * Consider the case where an IPI is generated immediately after allocation: 2109 * vector = lapic_ipi_alloc(ipifunc); 2110 * ipi_selected(other_cpus, vector); 2111 * 2112 * In xAPIC mode a write to ICR_LO has serializing semantics because the 2113 * APIC page is mapped as an uncached region. In x2APIC mode there is an 2114 * explicit 'mfence' before the ICR MSR is written. Therefore in both cases 2115 * the IDT slot update is globally visible before the IPI is delivered. 2116 */ 2117 int 2118 lapic_ipi_alloc(inthand_t *ipifunc) 2119 { 2120 struct gate_descriptor *ip; 2121 long func; 2122 int idx, vector; 2123 2124 KASSERT(ipifunc != &IDTVEC(rsvd) && ipifunc != &IDTVEC(rsvd_pti), 2125 ("invalid ipifunc %p", ipifunc)); 2126 2127 vector = -1; 2128 mtx_lock_spin(&icu_lock); 2129 for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) { 2130 ip = &idt[idx]; 2131 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2132 #ifdef __i386__ 2133 func -= setidt_disp; 2134 #endif 2135 if ((!pti && func == (uintptr_t)&IDTVEC(rsvd)) || 2136 (pti && func == (uintptr_t)&IDTVEC(rsvd_pti))) { 2137 vector = idx; 2138 setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC); 2139 break; 2140 } 2141 } 2142 mtx_unlock_spin(&icu_lock); 2143 return (vector); 2144 } 2145 2146 void 2147 lapic_ipi_free(int vector) 2148 { 2149 struct gate_descriptor *ip; 2150 long func __diagused; 2151 2152 KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST, 2153 ("%s: invalid vector %d", __func__, vector)); 2154 2155 mtx_lock_spin(&icu_lock); 2156 ip = &idt[vector]; 2157 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2158 #ifdef __i386__ 2159 func -= setidt_disp; 2160 #endif 2161 KASSERT(func != (uintptr_t)&IDTVEC(rsvd) && 2162 func != (uintptr_t)&IDTVEC(rsvd_pti), 2163 ("invalid idtfunc %#lx", func)); 2164 setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC, 2165 SEL_KPL, GSEL_APIC); 2166 mtx_unlock_spin(&icu_lock); 2167 } 2168