1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1996, by Steve Passe 5 * All rights reserved. 6 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. The name of the developer may NOT be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 3. Neither the name of the author nor the names of any co-contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Local APIC support on Pentium and later processors. 34 */ 35 36 #include <sys/cdefs.h> 37 #include "opt_atpic.h" 38 #include "opt_hwpmc_hooks.h" 39 40 #include "opt_ddb.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/asan.h> 45 #include <sys/bus.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/malloc.h> 49 #include <sys/msan.h> 50 #include <sys/mutex.h> 51 #include <sys/pcpu.h> 52 #include <sys/proc.h> 53 #include <sys/sched.h> 54 #include <sys/smp.h> 55 #include <sys/sysctl.h> 56 #include <sys/timeet.h> 57 #include <sys/timetc.h> 58 59 #include <vm/vm.h> 60 #include <vm/pmap.h> 61 62 #include <x86/apicreg.h> 63 #include <machine/clock.h> 64 #include <machine/cpufunc.h> 65 #include <machine/cputypes.h> 66 #include <machine/fpu.h> 67 #include <machine/frame.h> 68 #include <machine/intr_machdep.h> 69 #include <x86/apicvar.h> 70 #include <x86/mca.h> 71 #include <machine/md_var.h> 72 #include <machine/smp.h> 73 #include <machine/specialreg.h> 74 #include <x86/init.h> 75 76 #ifdef DDB 77 #include <sys/interrupt.h> 78 #include <ddb/ddb.h> 79 #endif 80 81 #ifdef __amd64__ 82 #define SDT_APIC SDT_SYSIGT 83 #define GSEL_APIC 0 84 #else 85 #define SDT_APIC SDT_SYS386IGT 86 #define GSEL_APIC GSEL(GCODE_SEL, SEL_KPL) 87 #endif 88 89 static MALLOC_DEFINE(M_LAPIC, "local_apic", "Local APIC items"); 90 91 /* Sanity checks on IDT vectors. */ 92 CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT); 93 CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS); 94 CTASSERT(APIC_LOCAL_INTS == 240); 95 CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); 96 97 /* 98 * I/O interrupts use non-negative IRQ values. These values are used 99 * to mark unused IDT entries or IDT entries reserved for a non-I/O 100 * interrupt. 101 */ 102 #define IRQ_FREE -1 103 #define IRQ_TIMER -2 104 #define IRQ_SYSCALL -3 105 #define IRQ_DTRACE_RET -4 106 #define IRQ_EVTCHN -5 107 108 enum lat_timer_mode { 109 LAT_MODE_UNDEF = 0, 110 LAT_MODE_PERIODIC = 1, 111 LAT_MODE_ONESHOT = 2, 112 LAT_MODE_DEADLINE = 3, 113 }; 114 115 /* 116 * Support for local APICs. Local APICs manage interrupts on each 117 * individual processor as opposed to I/O APICs which receive interrupts 118 * from I/O devices and then forward them on to the local APICs. 119 * 120 * Local APICs can also send interrupts to each other thus providing the 121 * mechanism for IPIs. 122 */ 123 124 struct lvt { 125 u_int lvt_edgetrigger:1; 126 u_int lvt_activehi:1; 127 u_int lvt_masked:1; 128 u_int lvt_active:1; 129 u_int lvt_mode:16; 130 u_int lvt_vector:8; 131 }; 132 133 struct lapic { 134 struct lvt la_lvts[APIC_LVT_MAX + 1]; 135 struct lvt la_elvts[APIC_ELVT_MAX + 1]; 136 u_int la_id:8; 137 u_int la_cluster:4; 138 u_int la_cluster_id:2; 139 u_int la_present:1; 140 u_long *la_timer_count; 141 uint64_t la_timer_period; 142 enum lat_timer_mode la_timer_mode; 143 uint32_t lvt_timer_base; 144 uint32_t lvt_timer_last; 145 /* Include IDT_SYSCALL to make indexing easier. */ 146 int la_ioint_irqs[APIC_NUM_IOINTS + 1]; 147 } static *lapics; 148 149 /* Global defaults for local APIC LVT entries. */ 150 static struct lvt lvts[APIC_LVT_MAX + 1] = { 151 { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 }, /* LINT0: masked ExtINT */ 152 { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 }, /* LINT1: NMI */ 153 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT }, /* Timer */ 154 { 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT }, /* Error */ 155 { 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 }, /* PMC */ 156 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT }, /* Thermal */ 157 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT }, /* CMCI */ 158 }; 159 160 /* Global defaults for AMD local APIC ELVT entries. */ 161 static struct lvt elvts[APIC_ELVT_MAX + 1] = { 162 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 163 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, APIC_CMC_INT }, 164 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 165 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 166 }; 167 168 static inthand_t *ioint_handlers[] = { 169 NULL, /* 0 - 31 */ 170 IDTVEC(apic_isr1), /* 32 - 63 */ 171 IDTVEC(apic_isr2), /* 64 - 95 */ 172 IDTVEC(apic_isr3), /* 96 - 127 */ 173 IDTVEC(apic_isr4), /* 128 - 159 */ 174 IDTVEC(apic_isr5), /* 160 - 191 */ 175 IDTVEC(apic_isr6), /* 192 - 223 */ 176 IDTVEC(apic_isr7), /* 224 - 255 */ 177 }; 178 179 static inthand_t *ioint_pti_handlers[] = { 180 NULL, /* 0 - 31 */ 181 IDTVEC(apic_isr1_pti), /* 32 - 63 */ 182 IDTVEC(apic_isr2_pti), /* 64 - 95 */ 183 IDTVEC(apic_isr3_pti), /* 96 - 127 */ 184 IDTVEC(apic_isr4_pti), /* 128 - 159 */ 185 IDTVEC(apic_isr5_pti), /* 160 - 191 */ 186 IDTVEC(apic_isr6_pti), /* 192 - 223 */ 187 IDTVEC(apic_isr7_pti), /* 224 - 255 */ 188 }; 189 190 static u_int32_t lapic_timer_divisors[] = { 191 APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, 192 APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128 193 }; 194 195 extern inthand_t IDTVEC(rsvd_pti), IDTVEC(rsvd); 196 197 volatile char *lapic_map; 198 vm_paddr_t lapic_paddr = DEFAULT_APIC_BASE; 199 int x2apic_mode; 200 int lapic_eoi_suppression; 201 static int lapic_timer_tsc_deadline; 202 static u_long lapic_timer_divisor, count_freq; 203 static struct eventtimer lapic_et; 204 #ifdef SMP 205 static uint64_t lapic_ipi_wait_mult; 206 static int __read_mostly lapic_ds_idle_timeout = 1000000; 207 #endif 208 unsigned int max_apic_id; 209 210 SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 211 "APIC options"); 212 SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, ""); 213 SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD, 214 &lapic_eoi_suppression, 0, ""); 215 SYSCTL_INT(_hw_apic, OID_AUTO, timer_tsc_deadline, CTLFLAG_RD, 216 &lapic_timer_tsc_deadline, 0, ""); 217 #ifdef SMP 218 SYSCTL_INT(_hw_apic, OID_AUTO, ds_idle_timeout, CTLFLAG_RWTUN, 219 &lapic_ds_idle_timeout, 0, 220 "timeout (in us) for APIC Delivery Status to become Idle (xAPIC only)"); 221 #endif 222 223 static void lapic_calibrate_initcount(struct lapic *la); 224 225 /* 226 * Use __nosanitizethread to exempt the LAPIC I/O accessors from KCSan 227 * instrumentation. Otherwise, if x2APIC is not available, use of the global 228 * lapic_map will generate a KCSan false positive. While the mapping is 229 * shared among all CPUs, the physical access will always take place on the 230 * local CPU's APIC, so there isn't in fact a race here. Furthermore, the 231 * KCSan warning printf can cause a panic if issued during LAPIC access, 232 * due to attempted recursive use of event timer resources. 233 */ 234 235 static uint32_t __nosanitizethread 236 lapic_read32(enum LAPIC_REGISTERS reg) 237 { 238 uint32_t res; 239 240 if (x2apic_mode) { 241 res = rdmsr32(MSR_APIC_000 + reg); 242 } else { 243 res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL); 244 } 245 return (res); 246 } 247 248 static void __nosanitizethread 249 lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val) 250 { 251 252 if (x2apic_mode) { 253 mfence(); 254 lfence(); 255 wrmsr(MSR_APIC_000 + reg, val); 256 } else { 257 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 258 } 259 } 260 261 static void __nosanitizethread 262 lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val) 263 { 264 265 if (x2apic_mode) { 266 wrmsr(MSR_APIC_000 + reg, val); 267 } else { 268 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 269 } 270 } 271 272 #ifdef SMP 273 static uint64_t 274 lapic_read_icr_lo(void) 275 { 276 277 return (lapic_read32(LAPIC_ICR_LO)); 278 } 279 280 static void 281 lapic_write_icr(uint32_t vhi, uint32_t vlo) 282 { 283 register_t saveintr; 284 uint64_t v; 285 286 if (x2apic_mode) { 287 v = ((uint64_t)vhi << 32) | vlo; 288 mfence(); 289 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v); 290 } else { 291 saveintr = intr_disable(); 292 lapic_write32(LAPIC_ICR_HI, vhi); 293 lapic_write32(LAPIC_ICR_LO, vlo); 294 intr_restore(saveintr); 295 } 296 } 297 298 static void 299 lapic_write_icr_lo(uint32_t vlo) 300 { 301 302 if (x2apic_mode) { 303 mfence(); 304 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, vlo); 305 } else { 306 lapic_write32(LAPIC_ICR_LO, vlo); 307 } 308 } 309 310 static void 311 lapic_write_self_ipi(uint32_t vector) 312 { 313 314 KASSERT(x2apic_mode, ("SELF IPI write in xAPIC mode")); 315 wrmsr(MSR_APIC_000 + LAPIC_SELF_IPI, vector); 316 } 317 #endif /* SMP */ 318 319 static void 320 lapic_enable_x2apic(void) 321 { 322 uint64_t apic_base; 323 324 apic_base = rdmsr(MSR_APICBASE); 325 apic_base |= APICBASE_X2APIC | APICBASE_ENABLED; 326 wrmsr(MSR_APICBASE, apic_base); 327 } 328 329 bool 330 lapic_is_x2apic(void) 331 { 332 uint64_t apic_base; 333 334 apic_base = rdmsr(MSR_APICBASE); 335 return ((apic_base & (APICBASE_X2APIC | APICBASE_ENABLED)) == 336 (APICBASE_X2APIC | APICBASE_ENABLED)); 337 } 338 339 static void lapic_enable(void); 340 static void lapic_resume(struct pic *pic, bool suspend_cancelled); 341 static void lapic_timer_oneshot(struct lapic *); 342 static void lapic_timer_oneshot_nointr(struct lapic *, uint32_t); 343 static void lapic_timer_periodic(struct lapic *); 344 static void lapic_timer_deadline(struct lapic *); 345 static void lapic_timer_stop(struct lapic *); 346 static void lapic_timer_set_divisor(u_int divisor); 347 static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value); 348 static int lapic_et_start(struct eventtimer *et, 349 sbintime_t first, sbintime_t period); 350 static int lapic_et_stop(struct eventtimer *et); 351 static u_int apic_idt_to_irq(u_int apic_id, u_int vector); 352 static void lapic_set_tpr(u_int vector); 353 354 struct pic lapic_pic = { .pic_resume = lapic_resume }; 355 356 static uint32_t 357 lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value) 358 { 359 360 value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM | 361 APIC_LVT_VECTOR); 362 if (lvt->lvt_edgetrigger == 0) 363 value |= APIC_LVT_TM; 364 if (lvt->lvt_activehi == 0) 365 value |= APIC_LVT_IIPP_INTALO; 366 if (lvt->lvt_masked) 367 value |= APIC_LVT_M; 368 value |= lvt->lvt_mode; 369 switch (lvt->lvt_mode) { 370 case APIC_LVT_DM_NMI: 371 case APIC_LVT_DM_SMI: 372 case APIC_LVT_DM_INIT: 373 case APIC_LVT_DM_EXTINT: 374 if (!lvt->lvt_edgetrigger && bootverbose) { 375 printf("lapic%u: Forcing LINT%u to edge trigger\n", 376 la->la_id, pin); 377 value &= ~APIC_LVT_TM; 378 } 379 /* Use a vector of 0. */ 380 break; 381 case APIC_LVT_DM_FIXED: 382 value |= lvt->lvt_vector; 383 break; 384 default: 385 panic("bad APIC LVT delivery mode: %#x\n", value); 386 } 387 return (value); 388 } 389 390 static uint32_t 391 lvt_mode(struct lapic *la, u_int pin, uint32_t value) 392 { 393 struct lvt *lvt; 394 395 KASSERT(pin <= APIC_LVT_MAX, 396 ("%s: pin %u out of range", __func__, pin)); 397 if (la->la_lvts[pin].lvt_active) 398 lvt = &la->la_lvts[pin]; 399 else 400 lvt = &lvts[pin]; 401 402 return (lvt_mode_impl(la, lvt, pin, value)); 403 } 404 405 static uint32_t 406 elvt_mode(struct lapic *la, u_int idx, uint32_t value) 407 { 408 struct lvt *elvt; 409 410 KASSERT(idx <= APIC_ELVT_MAX, 411 ("%s: idx %u out of range", __func__, idx)); 412 413 elvt = &la->la_elvts[idx]; 414 KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx)); 415 KASSERT(elvt->lvt_edgetrigger, 416 ("%s: ELVT%u is not edge triggered", __func__, idx)); 417 KASSERT(elvt->lvt_activehi, 418 ("%s: ELVT%u is not active high", __func__, idx)); 419 return (lvt_mode_impl(la, elvt, idx, value)); 420 } 421 422 /* 423 * Map the local APIC and setup necessary interrupt vectors. 424 */ 425 void 426 lapic_init(vm_paddr_t addr) 427 { 428 #ifdef SMP 429 uint64_t r, r1, r2, rx; 430 #endif 431 uint32_t ver; 432 int i; 433 bool arat; 434 435 TSENTER(); 436 437 /* 438 * Enable x2APIC mode if possible. Map the local APIC 439 * registers page. 440 * 441 * Keep the LAPIC registers page mapped uncached for x2APIC 442 * mode too, to have direct map page attribute set to 443 * uncached. This is needed to work around CPU errata present 444 * on all Intel processors. 445 */ 446 KASSERT(trunc_page(addr) == addr, 447 ("local APIC not aligned on a page boundary")); 448 lapic_paddr = addr; 449 lapic_map = pmap_mapdev(addr, PAGE_SIZE); 450 if (x2apic_mode) { 451 lapic_enable_x2apic(); 452 lapic_map = NULL; 453 } 454 455 /* Setup the spurious interrupt handler. */ 456 setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL, 457 GSEL_APIC); 458 459 /* Perform basic initialization of the BSP's local APIC. */ 460 lapic_enable(); 461 462 /* Set BSP's per-CPU local APIC ID. */ 463 PCPU_SET(apic_id, lapic_id()); 464 465 /* Local APIC timer interrupt. */ 466 setidt(APIC_TIMER_INT, pti ? IDTVEC(timerint_pti) : IDTVEC(timerint), 467 SDT_APIC, SEL_KPL, GSEL_APIC); 468 469 /* Local APIC error interrupt. */ 470 setidt(APIC_ERROR_INT, pti ? IDTVEC(errorint_pti) : IDTVEC(errorint), 471 SDT_APIC, SEL_KPL, GSEL_APIC); 472 473 /* XXX: Thermal interrupt */ 474 475 /* Local APIC CMCI. */ 476 setidt(APIC_CMC_INT, pti ? IDTVEC(cmcint_pti) : IDTVEC(cmcint), 477 SDT_APIC, SEL_KPL, GSEL_APIC); 478 479 if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) { 480 /* Set if APIC timer runs in C3. */ 481 arat = (cpu_power_eax & CPUTPM1_ARAT); 482 483 bzero(&lapic_et, sizeof(lapic_et)); 484 lapic_et.et_name = "LAPIC"; 485 lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT | 486 ET_FLAGS_PERCPU; 487 lapic_et.et_quality = 600; 488 if (!arat) { 489 lapic_et.et_flags |= ET_FLAGS_C3STOP; 490 lapic_et.et_quality = 100; 491 } 492 if ((cpu_feature & CPUID_TSC) != 0 && 493 (cpu_feature2 & CPUID2_TSCDLT) != 0 && 494 tsc_is_invariant && tsc_freq != 0) { 495 lapic_timer_tsc_deadline = 1; 496 TUNABLE_INT_FETCH("hw.lapic_tsc_deadline", 497 &lapic_timer_tsc_deadline); 498 } 499 500 lapic_et.et_frequency = 0; 501 /* We don't know frequency yet, so trying to guess. */ 502 lapic_et.et_min_period = 0x00001000LL; 503 lapic_et.et_max_period = SBT_1S; 504 lapic_et.et_start = lapic_et_start; 505 lapic_et.et_stop = lapic_et_stop; 506 lapic_et.et_priv = NULL; 507 et_register(&lapic_et); 508 } 509 510 /* 511 * Set lapic_eoi_suppression after lapic_enable(), to not 512 * enable suppression in the hardware prematurely. Note that 513 * we by default enable suppression even when system only has 514 * one IO-APIC, since EOI is broadcasted to all APIC agents, 515 * including CPUs, otherwise. 516 * 517 * It seems that at least some KVM versions report 518 * EOI_SUPPRESSION bit, but auto-EOI does not work. 519 */ 520 ver = lapic_read32(LAPIC_VERSION); 521 if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) { 522 lapic_eoi_suppression = 1; 523 if (vm_guest == VM_GUEST_KVM) { 524 if (bootverbose) 525 printf( 526 "KVM -- disabling lapic eoi suppression\n"); 527 lapic_eoi_suppression = 0; 528 } 529 TUNABLE_INT_FETCH("hw.lapic_eoi_suppression", 530 &lapic_eoi_suppression); 531 } 532 533 #ifdef SMP 534 #define LOOPS 1000 535 /* 536 * Calibrate the busy loop waiting for IPI ack in xAPIC mode. 537 * lapic_ipi_wait_mult contains the number of iterations which 538 * approximately delay execution for 1 microsecond (the 539 * argument to lapic_ipi_wait() is in microseconds). 540 * 541 * We assume that TSC is present and already measured. 542 * Possible TSC frequency jumps are irrelevant to the 543 * calibration loop below, the CPU clock management code is 544 * not yet started, and we do not enter sleep states. 545 */ 546 KASSERT((cpu_feature & CPUID_TSC) != 0 && tsc_freq != 0, 547 ("TSC not initialized")); 548 if (!x2apic_mode) { 549 r = rdtsc(); 550 for (rx = 0; rx < LOOPS; rx++) { 551 (void)lapic_read_icr_lo(); 552 ia32_pause(); 553 } 554 r = rdtsc() - r; 555 r1 = tsc_freq * LOOPS; 556 r2 = r * 1000000; 557 lapic_ipi_wait_mult = r1 >= r2 ? r1 / r2 : 1; 558 if (bootverbose) { 559 printf("LAPIC: ipi_wait() us multiplier %ju (r %ju " 560 "tsc %ju)\n", (uintmax_t)lapic_ipi_wait_mult, 561 (uintmax_t)r, (uintmax_t)tsc_freq); 562 } 563 } 564 #undef LOOPS 565 #endif /* SMP */ 566 567 TSEXIT(); 568 } 569 570 /* 571 * Create a local APIC instance. 572 */ 573 void 574 lapic_create(u_int apic_id, int boot_cpu) 575 { 576 int i; 577 578 if (apic_id > max_apic_id) { 579 printf("APIC: Ignoring local APIC with ID %d\n", apic_id); 580 if (boot_cpu) 581 panic("Can't ignore BSP"); 582 return; 583 } 584 KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u", 585 apic_id)); 586 587 /* 588 * Assume no local LVT overrides and a cluster of 0 and 589 * intra-cluster ID of 0. 590 */ 591 lapics[apic_id].la_present = 1; 592 lapics[apic_id].la_id = apic_id; 593 for (i = 0; i <= APIC_LVT_MAX; i++) { 594 lapics[apic_id].la_lvts[i] = lvts[i]; 595 lapics[apic_id].la_lvts[i].lvt_active = 0; 596 } 597 for (i = 0; i <= APIC_ELVT_MAX; i++) { 598 lapics[apic_id].la_elvts[i] = elvts[i]; 599 lapics[apic_id].la_elvts[i].lvt_active = 0; 600 } 601 for (i = 0; i <= APIC_NUM_IOINTS; i++) 602 lapics[apic_id].la_ioint_irqs[i] = IRQ_FREE; 603 lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; 604 lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = 605 IRQ_TIMER; 606 #ifdef KDTRACE_HOOKS 607 lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = 608 IRQ_DTRACE_RET; 609 #endif 610 #ifdef XENHVM 611 lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN; 612 #endif 613 614 #ifdef SMP 615 cpu_add(apic_id, boot_cpu); 616 #endif 617 } 618 619 static inline uint32_t 620 amd_read_ext_features(void) 621 { 622 uint32_t version; 623 624 if (cpu_vendor_id != CPU_VENDOR_AMD && 625 cpu_vendor_id != CPU_VENDOR_HYGON) 626 return (0); 627 version = lapic_read32(LAPIC_VERSION); 628 if ((version & APIC_VER_AMD_EXT_SPACE) != 0) 629 return (lapic_read32(LAPIC_EXT_FEATURES)); 630 else 631 return (0); 632 } 633 634 static inline uint32_t 635 amd_read_elvt_count(void) 636 { 637 uint32_t extf; 638 uint32_t count; 639 640 extf = amd_read_ext_features(); 641 count = (extf & APIC_EXTF_ELVT_MASK) >> APIC_EXTF_ELVT_SHIFT; 642 count = min(count, APIC_ELVT_MAX + 1); 643 return (count); 644 } 645 646 /* 647 * Dump contents of local APIC registers 648 */ 649 void 650 lapic_dump(const char* str) 651 { 652 uint32_t version; 653 uint32_t maxlvt; 654 uint32_t extf; 655 int elvt_count; 656 int i; 657 658 version = lapic_read32(LAPIC_VERSION); 659 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 660 printf("cpu%d %s:\n", PCPU_GET(cpuid), str); 661 printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x", 662 lapic_read32(LAPIC_ID), version, 663 lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR)); 664 if ((cpu_feature2 & CPUID2_X2APIC) != 0) 665 printf(" x2APIC: %d", x2apic_mode); 666 printf("\n lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", 667 lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1), 668 lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR)); 669 printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x", 670 lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL), 671 lapic_read32(LAPIC_LVT_ERROR)); 672 if (maxlvt >= APIC_LVT_PMC) 673 printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT)); 674 printf("\n"); 675 if (maxlvt >= APIC_LVT_CMCI) 676 printf(" cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI)); 677 extf = amd_read_ext_features(); 678 if (extf != 0) { 679 printf(" AMD ext features: 0x%08x", extf); 680 elvt_count = amd_read_elvt_count(); 681 for (i = 0; i < elvt_count; i++) 682 printf("%s elvt%d: 0x%08x", (i % 4) ? "" : "\n ", i, 683 lapic_read32(LAPIC_EXT_LVT0 + i)); 684 printf("\n"); 685 } 686 } 687 688 void 689 lapic_xapic_mode(void) 690 { 691 register_t saveintr; 692 693 saveintr = intr_disable(); 694 if (x2apic_mode) 695 lapic_enable_x2apic(); 696 intr_restore(saveintr); 697 } 698 699 void 700 lapic_setup(int boot) 701 { 702 struct lapic *la; 703 uint32_t version; 704 uint32_t maxlvt; 705 register_t saveintr; 706 int elvt_count; 707 int i; 708 709 saveintr = intr_disable(); 710 711 la = &lapics[lapic_id()]; 712 KASSERT(la->la_present, ("missing APIC structure")); 713 version = lapic_read32(LAPIC_VERSION); 714 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 715 716 /* Initialize the TPR to allow all interrupts. */ 717 lapic_set_tpr(0); 718 719 /* Setup spurious vector and enable the local APIC. */ 720 lapic_enable(); 721 722 /* Program LINT[01] LVT entries. */ 723 lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0, 724 lapic_read32(LAPIC_LVT_LINT0))); 725 lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1, 726 lapic_read32(LAPIC_LVT_LINT1))); 727 728 /* Program the PMC LVT entry if present. */ 729 if (maxlvt >= APIC_LVT_PMC) { 730 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 731 LAPIC_LVT_PCINT)); 732 } 733 734 /* 735 * Program the timer LVT. Calibration is deferred until it is certain 736 * that we have a reliable timecounter. 737 */ 738 la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER, 739 lapic_read32(LAPIC_LVT_TIMER)); 740 la->lvt_timer_last = la->lvt_timer_base; 741 lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base); 742 743 if (boot) 744 la->la_timer_mode = LAT_MODE_UNDEF; 745 else if (la->la_timer_mode != LAT_MODE_UNDEF) { 746 KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor", 747 lapic_id())); 748 switch (la->la_timer_mode) { 749 case LAT_MODE_PERIODIC: 750 lapic_timer_set_divisor(lapic_timer_divisor); 751 lapic_timer_periodic(la); 752 break; 753 case LAT_MODE_ONESHOT: 754 lapic_timer_set_divisor(lapic_timer_divisor); 755 lapic_timer_oneshot(la); 756 break; 757 case LAT_MODE_DEADLINE: 758 lapic_timer_deadline(la); 759 break; 760 default: 761 panic("corrupted la_timer_mode %p %d", la, 762 la->la_timer_mode); 763 } 764 } 765 766 /* Program error LVT and clear any existing errors. */ 767 lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR, 768 lapic_read32(LAPIC_LVT_ERROR))); 769 lapic_write32(LAPIC_ESR, 0); 770 771 /* XXX: Thermal LVT */ 772 773 /* Program the CMCI LVT entry if present. */ 774 if (maxlvt >= APIC_LVT_CMCI) { 775 lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI, 776 lapic_read32(LAPIC_LVT_CMCI))); 777 } 778 779 elvt_count = amd_read_elvt_count(); 780 for (i = 0; i < elvt_count; i++) { 781 if (la->la_elvts[i].lvt_active) 782 lapic_write32(LAPIC_EXT_LVT0 + i, 783 elvt_mode(la, i, lapic_read32(LAPIC_EXT_LVT0 + i))); 784 } 785 786 intr_restore(saveintr); 787 } 788 789 static void 790 lapic_intrcnt(void *dummy __unused) 791 { 792 struct pcpu *pc; 793 struct lapic *la; 794 char buf[MAXCOMLEN + 1]; 795 796 /* If there are no APICs, skip this function. */ 797 if (lapics == NULL) 798 return; 799 800 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 801 la = &lapics[pc->pc_apic_id]; 802 if (!la->la_present) 803 continue; 804 805 snprintf(buf, sizeof(buf), "cpu%d:timer", pc->pc_cpuid); 806 intrcnt_add(buf, &la->la_timer_count); 807 } 808 } 809 SYSINIT(lapic_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, lapic_intrcnt, NULL); 810 811 void 812 lapic_reenable_pmc(void) 813 { 814 #ifdef HWPMC_HOOKS 815 uint32_t value; 816 817 value = lapic_read32(LAPIC_LVT_PCINT); 818 value &= ~APIC_LVT_M; 819 lapic_write32(LAPIC_LVT_PCINT, value); 820 #endif 821 } 822 823 #ifdef HWPMC_HOOKS 824 static void 825 lapic_update_pmc(void *dummy) 826 { 827 struct lapic *la; 828 829 la = &lapics[lapic_id()]; 830 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 831 lapic_read32(LAPIC_LVT_PCINT))); 832 } 833 #endif 834 835 void 836 lapic_calibrate_timer(void) 837 { 838 struct lapic *la; 839 register_t intr; 840 841 #ifdef DEV_ATPIC 842 /* Fail if the local APIC is not present. */ 843 if (!x2apic_mode && lapic_map == NULL) 844 return; 845 #endif 846 847 intr = intr_disable(); 848 la = &lapics[lapic_id()]; 849 850 lapic_calibrate_initcount(la); 851 852 intr_restore(intr); 853 854 if (lapic_timer_tsc_deadline && bootverbose) { 855 printf("lapic: deadline tsc mode, Frequency %ju Hz\n", 856 (uintmax_t)tsc_freq); 857 } 858 } 859 860 int 861 lapic_enable_pmc(void) 862 { 863 #ifdef HWPMC_HOOKS 864 u_int32_t maxlvt; 865 866 #ifdef DEV_ATPIC 867 /* Fail if the local APIC is not present. */ 868 if (!x2apic_mode && lapic_map == NULL) 869 return (0); 870 #endif 871 872 /* Fail if the PMC LVT is not present. */ 873 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 874 if (maxlvt < APIC_LVT_PMC) 875 return (0); 876 877 lvts[APIC_LVT_PMC].lvt_masked = 0; 878 879 #ifdef EARLY_AP_STARTUP 880 MPASS(mp_ncpus == 1 || smp_started); 881 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 882 #else 883 #ifdef SMP 884 /* 885 * If hwpmc was loaded at boot time then the APs may not be 886 * started yet. In that case, don't forward the request to 887 * them as they will program the lvt when they start. 888 */ 889 if (smp_started) 890 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 891 else 892 #endif 893 lapic_update_pmc(NULL); 894 #endif 895 return (1); 896 #else 897 return (0); 898 #endif 899 } 900 901 void 902 lapic_disable_pmc(void) 903 { 904 #ifdef HWPMC_HOOKS 905 u_int32_t maxlvt; 906 907 #ifdef DEV_ATPIC 908 /* Fail if the local APIC is not present. */ 909 if (!x2apic_mode && lapic_map == NULL) 910 return; 911 #endif 912 913 /* Fail if the PMC LVT is not present. */ 914 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 915 if (maxlvt < APIC_LVT_PMC) 916 return; 917 918 lvts[APIC_LVT_PMC].lvt_masked = 1; 919 920 #ifdef SMP 921 /* The APs should always be started when hwpmc is unloaded. */ 922 KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early")); 923 #endif 924 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 925 #endif 926 } 927 928 static int 929 lapic_calibrate_initcount_cpuid_vm(void) 930 { 931 u_int regs[4]; 932 uint64_t freq; 933 934 /* Get value from CPUID leaf if possible. */ 935 if (vm_guest == VM_GUEST_NO) 936 return (false); 937 if (hv_high < 0x40000010) 938 return (false); 939 do_cpuid(0x40000010, regs); 940 freq = (uint64_t)(regs[1]) * 1000; 941 942 /* Pick timer divisor. */ 943 lapic_timer_divisor = 2; 944 do { 945 if (freq / lapic_timer_divisor < APIC_TIMER_MAX_COUNT) 946 break; 947 lapic_timer_divisor <<= 1; 948 } while (lapic_timer_divisor <= 128); 949 if (lapic_timer_divisor > 128) 950 return (false); 951 952 /* Record divided frequency. */ 953 count_freq = freq / lapic_timer_divisor; 954 return (count_freq != 0); 955 } 956 957 static uint64_t 958 cb_lapic_getcount(void) 959 { 960 961 return (APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER)); 962 } 963 964 static void 965 lapic_calibrate_initcount(struct lapic *la) 966 { 967 uint64_t freq; 968 969 if (lapic_calibrate_initcount_cpuid_vm()) 970 goto done; 971 972 /* Calibrate the APIC timer frequency. */ 973 lapic_timer_set_divisor(2); 974 lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT); 975 fpu_kern_enter(curthread, NULL, FPU_KERN_NOCTX); 976 freq = clockcalib(cb_lapic_getcount, "lapic"); 977 fpu_kern_leave(curthread, NULL); 978 979 /* Pick a different divisor if necessary. */ 980 lapic_timer_divisor = 2; 981 do { 982 if (freq * 2 / lapic_timer_divisor < APIC_TIMER_MAX_COUNT) 983 break; 984 lapic_timer_divisor <<= 1; 985 } while (lapic_timer_divisor <= 128); 986 if (lapic_timer_divisor > 128) 987 panic("lapic: Divisor too big"); 988 count_freq = freq * 2 / lapic_timer_divisor; 989 done: 990 if (bootverbose) { 991 printf("lapic: Divisor %lu, Frequency %lu Hz\n", 992 lapic_timer_divisor, count_freq); 993 } 994 } 995 996 static void 997 lapic_change_mode(struct eventtimer *et, struct lapic *la, 998 enum lat_timer_mode newmode) 999 { 1000 if (la->la_timer_mode == newmode) 1001 return; 1002 switch (newmode) { 1003 case LAT_MODE_PERIODIC: 1004 lapic_timer_set_divisor(lapic_timer_divisor); 1005 et->et_frequency = count_freq; 1006 break; 1007 case LAT_MODE_DEADLINE: 1008 et->et_frequency = tsc_freq; 1009 break; 1010 case LAT_MODE_ONESHOT: 1011 lapic_timer_set_divisor(lapic_timer_divisor); 1012 et->et_frequency = count_freq; 1013 break; 1014 default: 1015 panic("lapic_change_mode %d", newmode); 1016 } 1017 la->la_timer_mode = newmode; 1018 et->et_min_period = (0x00000002LLU << 32) / et->et_frequency; 1019 et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency; 1020 } 1021 1022 static int 1023 lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) 1024 { 1025 struct lapic *la; 1026 1027 la = &lapics[PCPU_GET(apic_id)]; 1028 if (period != 0) { 1029 lapic_change_mode(et, la, LAT_MODE_PERIODIC); 1030 la->la_timer_period = ((uint32_t)et->et_frequency * period) >> 1031 32; 1032 lapic_timer_periodic(la); 1033 } else if (lapic_timer_tsc_deadline) { 1034 lapic_change_mode(et, la, LAT_MODE_DEADLINE); 1035 la->la_timer_period = (et->et_frequency * first) >> 32; 1036 lapic_timer_deadline(la); 1037 } else { 1038 lapic_change_mode(et, la, LAT_MODE_ONESHOT); 1039 la->la_timer_period = ((uint32_t)et->et_frequency * first) >> 1040 32; 1041 lapic_timer_oneshot(la); 1042 } 1043 return (0); 1044 } 1045 1046 static int 1047 lapic_et_stop(struct eventtimer *et) 1048 { 1049 struct lapic *la; 1050 1051 la = &lapics[PCPU_GET(apic_id)]; 1052 lapic_timer_stop(la); 1053 la->la_timer_mode = LAT_MODE_UNDEF; 1054 return (0); 1055 } 1056 1057 void 1058 lapic_disable(void) 1059 { 1060 uint32_t value; 1061 1062 /* Software disable the local APIC. */ 1063 value = lapic_read32(LAPIC_SVR); 1064 value &= ~APIC_SVR_SWEN; 1065 lapic_write32(LAPIC_SVR, value); 1066 } 1067 1068 static void 1069 lapic_enable(void) 1070 { 1071 uint32_t value; 1072 1073 /* Program the spurious vector to enable the local APIC. */ 1074 value = lapic_read32(LAPIC_SVR); 1075 value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS); 1076 value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT; 1077 if (lapic_eoi_suppression) 1078 value |= APIC_SVR_EOI_SUPPRESSION; 1079 lapic_write32(LAPIC_SVR, value); 1080 } 1081 1082 /* Reset the local APIC on the BSP during resume. */ 1083 static void 1084 lapic_resume(struct pic *pic, bool suspend_cancelled) 1085 { 1086 1087 lapic_setup(0); 1088 } 1089 1090 int 1091 lapic_id(void) 1092 { 1093 uint32_t v; 1094 1095 KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped")); 1096 v = lapic_read32(LAPIC_ID); 1097 if (!x2apic_mode) 1098 v >>= APIC_ID_SHIFT; 1099 return (v); 1100 } 1101 1102 int 1103 lapic_intr_pending(u_int vector) 1104 { 1105 uint32_t irr; 1106 1107 /* 1108 * The IRR registers are an array of registers each of which 1109 * only describes 32 interrupts in the low 32 bits. Thus, we 1110 * divide the vector by 32 to get the register index. 1111 * Finally, we modulus the vector by 32 to determine the 1112 * individual bit to test. 1113 */ 1114 irr = lapic_read32(LAPIC_IRR0 + vector / 32); 1115 return (irr & 1 << (vector % 32)); 1116 } 1117 1118 void 1119 lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) 1120 { 1121 struct lapic *la; 1122 1123 KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist", 1124 __func__, apic_id)); 1125 KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big", 1126 __func__, cluster)); 1127 KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID, 1128 ("%s: intra cluster id %u too big", __func__, cluster_id)); 1129 la = &lapics[apic_id]; 1130 la->la_cluster = cluster; 1131 la->la_cluster_id = cluster_id; 1132 } 1133 1134 int 1135 lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked) 1136 { 1137 1138 if (pin > APIC_LVT_MAX) 1139 return (EINVAL); 1140 if (apic_id == APIC_ID_ALL) { 1141 lvts[pin].lvt_masked = masked; 1142 if (bootverbose) 1143 printf("lapic:"); 1144 } else { 1145 KASSERT(lapics[apic_id].la_present, 1146 ("%s: missing APIC %u", __func__, apic_id)); 1147 lapics[apic_id].la_lvts[pin].lvt_masked = masked; 1148 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1149 if (bootverbose) 1150 printf("lapic%u:", apic_id); 1151 } 1152 if (bootverbose) 1153 printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked"); 1154 return (0); 1155 } 1156 1157 int 1158 lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode) 1159 { 1160 struct lvt *lvt; 1161 1162 if (pin > APIC_LVT_MAX) 1163 return (EINVAL); 1164 if (apic_id == APIC_ID_ALL) { 1165 lvt = &lvts[pin]; 1166 if (bootverbose) 1167 printf("lapic:"); 1168 } else { 1169 KASSERT(lapics[apic_id].la_present, 1170 ("%s: missing APIC %u", __func__, apic_id)); 1171 lvt = &lapics[apic_id].la_lvts[pin]; 1172 lvt->lvt_active = 1; 1173 if (bootverbose) 1174 printf("lapic%u:", apic_id); 1175 } 1176 lvt->lvt_mode = mode; 1177 switch (mode) { 1178 case APIC_LVT_DM_NMI: 1179 case APIC_LVT_DM_SMI: 1180 case APIC_LVT_DM_INIT: 1181 case APIC_LVT_DM_EXTINT: 1182 lvt->lvt_edgetrigger = 1; 1183 lvt->lvt_activehi = 1; 1184 if (mode == APIC_LVT_DM_EXTINT) 1185 lvt->lvt_masked = 1; 1186 else 1187 lvt->lvt_masked = 0; 1188 break; 1189 default: 1190 panic("Unsupported delivery mode: 0x%x\n", mode); 1191 } 1192 if (bootverbose) { 1193 printf(" Routing "); 1194 switch (mode) { 1195 case APIC_LVT_DM_NMI: 1196 printf("NMI"); 1197 break; 1198 case APIC_LVT_DM_SMI: 1199 printf("SMI"); 1200 break; 1201 case APIC_LVT_DM_INIT: 1202 printf("INIT"); 1203 break; 1204 case APIC_LVT_DM_EXTINT: 1205 printf("ExtINT"); 1206 break; 1207 } 1208 printf(" -> LINT%u\n", pin); 1209 } 1210 return (0); 1211 } 1212 1213 int 1214 lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol) 1215 { 1216 1217 if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM) 1218 return (EINVAL); 1219 if (apic_id == APIC_ID_ALL) { 1220 lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); 1221 if (bootverbose) 1222 printf("lapic:"); 1223 } else { 1224 KASSERT(lapics[apic_id].la_present, 1225 ("%s: missing APIC %u", __func__, apic_id)); 1226 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1227 lapics[apic_id].la_lvts[pin].lvt_activehi = 1228 (pol == INTR_POLARITY_HIGH); 1229 if (bootverbose) 1230 printf("lapic%u:", apic_id); 1231 } 1232 if (bootverbose) 1233 printf(" LINT%u polarity: %s\n", pin, 1234 pol == INTR_POLARITY_HIGH ? "high" : "low"); 1235 return (0); 1236 } 1237 1238 int 1239 lapic_set_lvt_triggermode(u_int apic_id, u_int pin, 1240 enum intr_trigger trigger) 1241 { 1242 1243 if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM) 1244 return (EINVAL); 1245 if (apic_id == APIC_ID_ALL) { 1246 lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); 1247 if (bootverbose) 1248 printf("lapic:"); 1249 } else { 1250 KASSERT(lapics[apic_id].la_present, 1251 ("%s: missing APIC %u", __func__, apic_id)); 1252 lapics[apic_id].la_lvts[pin].lvt_edgetrigger = 1253 (trigger == INTR_TRIGGER_EDGE); 1254 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1255 if (bootverbose) 1256 printf("lapic%u:", apic_id); 1257 } 1258 if (bootverbose) 1259 printf(" LINT%u trigger: %s\n", pin, 1260 trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); 1261 return (0); 1262 } 1263 1264 /* 1265 * Adjust the TPR of the current CPU so that it blocks all interrupts below 1266 * the passed in vector. 1267 */ 1268 static void 1269 lapic_set_tpr(u_int vector) 1270 { 1271 #ifdef CHEAP_TPR 1272 lapic_write32(LAPIC_TPR, vector); 1273 #else 1274 uint32_t tpr; 1275 1276 tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO; 1277 tpr |= vector; 1278 lapic_write32(LAPIC_TPR, tpr); 1279 #endif 1280 } 1281 1282 void 1283 lapic_eoi(void) 1284 { 1285 1286 lapic_write32_nofence(LAPIC_EOI, 0); 1287 } 1288 1289 void 1290 lapic_handle_intr(int vector, struct trapframe *frame) 1291 { 1292 struct intsrc *isrc; 1293 1294 kasan_mark(frame, sizeof(*frame), sizeof(*frame), 0); 1295 kmsan_mark(&vector, sizeof(vector), KMSAN_STATE_INITED); 1296 kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED); 1297 trap_check_kstack(); 1298 1299 isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id), 1300 vector)); 1301 intr_execute_handlers(isrc, frame); 1302 } 1303 1304 void 1305 lapic_handle_timer(struct trapframe *frame) 1306 { 1307 struct lapic *la; 1308 struct trapframe *oldframe; 1309 struct thread *td; 1310 1311 /* Send EOI first thing. */ 1312 lapic_eoi(); 1313 1314 kasan_mark(frame, sizeof(*frame), sizeof(*frame), 0); 1315 kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED); 1316 trap_check_kstack(); 1317 1318 #if defined(SMP) && !defined(SCHED_ULE) 1319 /* 1320 * Don't do any accounting for the disabled HTT cores, since it 1321 * will provide misleading numbers for the userland. 1322 * 1323 * No locking is necessary here, since even if we lose the race 1324 * when hlt_cpus_mask changes it is not a big deal, really. 1325 * 1326 * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask 1327 * and unlike other schedulers it actually schedules threads to 1328 * those CPUs. 1329 */ 1330 if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)) 1331 return; 1332 #endif 1333 1334 /* Look up our local APIC structure for the tick counters. */ 1335 la = &lapics[PCPU_GET(apic_id)]; 1336 (*la->la_timer_count)++; 1337 critical_enter(); 1338 if (lapic_et.et_active) { 1339 td = curthread; 1340 td->td_intr_nesting_level++; 1341 oldframe = td->td_intr_frame; 1342 td->td_intr_frame = frame; 1343 lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg); 1344 td->td_intr_frame = oldframe; 1345 td->td_intr_nesting_level--; 1346 } 1347 critical_exit(); 1348 } 1349 1350 static void 1351 lapic_timer_set_divisor(u_int divisor) 1352 { 1353 1354 KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor)); 1355 KASSERT(ffs(divisor) <= nitems(lapic_timer_divisors), 1356 ("lapic: invalid divisor %u", divisor)); 1357 lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]); 1358 } 1359 1360 static void 1361 lapic_timer_oneshot(struct lapic *la) 1362 { 1363 uint32_t value; 1364 1365 value = la->lvt_timer_base; 1366 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1367 value |= APIC_LVTT_TM_ONE_SHOT; 1368 la->lvt_timer_last = value; 1369 lapic_write32(LAPIC_LVT_TIMER, value); 1370 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1371 } 1372 1373 static void 1374 lapic_timer_oneshot_nointr(struct lapic *la, uint32_t count) 1375 { 1376 uint32_t value; 1377 1378 value = la->lvt_timer_base; 1379 value &= ~APIC_LVTT_TM; 1380 value |= APIC_LVTT_TM_ONE_SHOT | APIC_LVT_M; 1381 la->lvt_timer_last = value; 1382 lapic_write32(LAPIC_LVT_TIMER, value); 1383 lapic_write32(LAPIC_ICR_TIMER, count); 1384 } 1385 1386 static void 1387 lapic_timer_periodic(struct lapic *la) 1388 { 1389 uint32_t value; 1390 1391 value = la->lvt_timer_base; 1392 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1393 value |= APIC_LVTT_TM_PERIODIC; 1394 la->lvt_timer_last = value; 1395 lapic_write32(LAPIC_LVT_TIMER, value); 1396 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1397 } 1398 1399 static void 1400 lapic_timer_deadline(struct lapic *la) 1401 { 1402 uint32_t value; 1403 1404 value = la->lvt_timer_base; 1405 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1406 value |= APIC_LVTT_TM_TSCDLT; 1407 if (value != la->lvt_timer_last) { 1408 la->lvt_timer_last = value; 1409 lapic_write32_nofence(LAPIC_LVT_TIMER, value); 1410 if (!x2apic_mode) 1411 mfence(); 1412 } 1413 wrmsr(MSR_TSC_DEADLINE, la->la_timer_period + rdtsc()); 1414 } 1415 1416 static void 1417 lapic_timer_stop(struct lapic *la) 1418 { 1419 uint32_t value; 1420 1421 if (la->la_timer_mode == LAT_MODE_DEADLINE) { 1422 wrmsr(MSR_TSC_DEADLINE, 0); 1423 mfence(); 1424 } else { 1425 value = la->lvt_timer_base; 1426 value &= ~APIC_LVTT_TM; 1427 value |= APIC_LVT_M; 1428 la->lvt_timer_last = value; 1429 lapic_write32(LAPIC_LVT_TIMER, value); 1430 } 1431 } 1432 1433 void 1434 lapic_handle_cmc(void) 1435 { 1436 trap_check_kstack(); 1437 1438 lapic_eoi(); 1439 cmc_intr(); 1440 } 1441 1442 /* 1443 * Called from the mca_init() to activate the CMC interrupt if this CPU is 1444 * responsible for monitoring any MC banks for CMC events. Since mca_init() 1445 * is called prior to lapic_setup() during boot, this just needs to unmask 1446 * this CPU's LVT_CMCI entry. 1447 */ 1448 void 1449 lapic_enable_cmc(void) 1450 { 1451 u_int apic_id; 1452 1453 #ifdef DEV_ATPIC 1454 if (!x2apic_mode && lapic_map == NULL) 1455 return; 1456 #endif 1457 apic_id = PCPU_GET(apic_id); 1458 KASSERT(lapics[apic_id].la_present, 1459 ("%s: missing APIC %u", __func__, apic_id)); 1460 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0; 1461 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1; 1462 } 1463 1464 int 1465 lapic_enable_mca_elvt(void) 1466 { 1467 u_int apic_id; 1468 uint32_t value; 1469 int elvt_count; 1470 1471 #ifdef DEV_ATPIC 1472 if (lapic_map == NULL) 1473 return (-1); 1474 #endif 1475 1476 apic_id = PCPU_GET(apic_id); 1477 KASSERT(lapics[apic_id].la_present, 1478 ("%s: missing APIC %u", __func__, apic_id)); 1479 elvt_count = amd_read_elvt_count(); 1480 if (elvt_count <= APIC_ELVT_MCA) 1481 return (-1); 1482 1483 value = lapic_read32(LAPIC_EXT_LVT0 + APIC_ELVT_MCA); 1484 if ((value & APIC_LVT_M) == 0) { 1485 if (bootverbose) 1486 printf("AMD MCE Thresholding Extended LVT is already active\n"); 1487 return (APIC_ELVT_MCA); 1488 } 1489 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_masked = 0; 1490 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_active = 1; 1491 return (APIC_ELVT_MCA); 1492 } 1493 1494 void 1495 lapic_handle_error(void) 1496 { 1497 uint32_t esr; 1498 1499 trap_check_kstack(); 1500 1501 /* 1502 * Read the contents of the error status register. Write to 1503 * the register first before reading from it to force the APIC 1504 * to update its value to indicate any errors that have 1505 * occurred since the previous write to the register. 1506 */ 1507 lapic_write32(LAPIC_ESR, 0); 1508 esr = lapic_read32(LAPIC_ESR); 1509 1510 printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr); 1511 lapic_eoi(); 1512 } 1513 1514 u_int 1515 apic_cpuid(u_int apic_id) 1516 { 1517 #ifdef SMP 1518 return apic_cpuids[apic_id]; 1519 #else 1520 return 0; 1521 #endif 1522 } 1523 1524 /* Request a free IDT vector to be used by the specified IRQ. */ 1525 u_int 1526 apic_alloc_vector(u_int apic_id, u_int irq) 1527 { 1528 u_int vector; 1529 1530 KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); 1531 1532 /* 1533 * Search for a free vector. Currently we just use a very simple 1534 * algorithm to find the first free vector. 1535 */ 1536 mtx_lock_spin(&icu_lock); 1537 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1538 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) 1539 continue; 1540 lapics[apic_id].la_ioint_irqs[vector] = irq; 1541 mtx_unlock_spin(&icu_lock); 1542 return (vector + APIC_IO_INTS); 1543 } 1544 mtx_unlock_spin(&icu_lock); 1545 return (0); 1546 } 1547 1548 /* 1549 * Request 'count' free contiguous IDT vectors to be used by 'count' 1550 * IRQs. 'count' must be a power of two and the vectors will be 1551 * aligned on a boundary of 'align'. If the request cannot be 1552 * satisfied, 0 is returned. 1553 */ 1554 u_int 1555 apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) 1556 { 1557 u_int first, run, vector; 1558 1559 KASSERT(powerof2(count), ("bad count")); 1560 KASSERT(powerof2(align), ("bad align")); 1561 KASSERT(align >= count, ("align < count")); 1562 #ifdef INVARIANTS 1563 for (run = 0; run < count; run++) 1564 KASSERT(irqs[run] < num_io_irqs, ("Invalid IRQ %u at index %u", 1565 irqs[run], run)); 1566 #endif 1567 1568 /* 1569 * Search for 'count' free vectors. As with apic_alloc_vector(), 1570 * this just uses a simple first fit algorithm. 1571 */ 1572 run = 0; 1573 first = 0; 1574 mtx_lock_spin(&icu_lock); 1575 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1576 /* Vector is in use, end run. */ 1577 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) { 1578 run = 0; 1579 first = 0; 1580 continue; 1581 } 1582 1583 /* Start a new run if run == 0 and vector is aligned. */ 1584 if (run == 0) { 1585 if ((vector & (align - 1)) != 0) 1586 continue; 1587 first = vector; 1588 } 1589 run++; 1590 1591 /* Keep looping if the run isn't long enough yet. */ 1592 if (run < count) 1593 continue; 1594 1595 /* Found a run, assign IRQs and return the first vector. */ 1596 for (vector = 0; vector < count; vector++) 1597 lapics[apic_id].la_ioint_irqs[first + vector] = 1598 irqs[vector]; 1599 mtx_unlock_spin(&icu_lock); 1600 return (first + APIC_IO_INTS); 1601 } 1602 mtx_unlock_spin(&icu_lock); 1603 printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count); 1604 return (0); 1605 } 1606 1607 /* 1608 * Enable a vector for a particular apic_id. Since all lapics share idt 1609 * entries and ioint_handlers this enables the vector on all lapics. lapics 1610 * which do not have the vector configured would report spurious interrupts 1611 * should it fire. 1612 */ 1613 void 1614 apic_enable_vector(u_int apic_id, u_int vector) 1615 { 1616 1617 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1618 KASSERT(ioint_handlers[vector / 32] != NULL, 1619 ("No ISR handler for vector %u", vector)); 1620 #ifdef KDTRACE_HOOKS 1621 KASSERT(vector != IDT_DTRACE_RET, 1622 ("Attempt to overwrite DTrace entry")); 1623 #endif 1624 setidt(vector, (pti ? ioint_pti_handlers : ioint_handlers)[vector / 32], 1625 SDT_APIC, SEL_KPL, GSEL_APIC); 1626 } 1627 1628 void 1629 apic_disable_vector(u_int apic_id, u_int vector) 1630 { 1631 1632 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1633 #ifdef KDTRACE_HOOKS 1634 KASSERT(vector != IDT_DTRACE_RET, 1635 ("Attempt to overwrite DTrace entry")); 1636 #endif 1637 KASSERT(ioint_handlers[vector / 32] != NULL, 1638 ("No ISR handler for vector %u", vector)); 1639 #ifdef notyet 1640 /* 1641 * We can not currently clear the idt entry because other cpus 1642 * may have a valid vector at this offset. 1643 */ 1644 setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC, 1645 SEL_KPL, GSEL_APIC); 1646 #endif 1647 } 1648 1649 /* Release an APIC vector when it's no longer in use. */ 1650 void 1651 apic_free_vector(u_int apic_id, u_int vector, u_int irq) 1652 { 1653 struct thread *td; 1654 1655 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1656 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1657 ("Vector %u does not map to an IRQ line", vector)); 1658 KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); 1659 KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] == 1660 irq, ("IRQ mismatch")); 1661 #ifdef KDTRACE_HOOKS 1662 KASSERT(vector != IDT_DTRACE_RET, 1663 ("Attempt to overwrite DTrace entry")); 1664 #endif 1665 1666 /* 1667 * Bind us to the cpu that owned the vector before freeing it so 1668 * we don't lose an interrupt delivery race. 1669 */ 1670 td = curthread; 1671 if (!rebooting) { 1672 thread_lock(td); 1673 if (sched_is_bound(td)) 1674 panic("apic_free_vector: Thread already bound.\n"); 1675 sched_bind(td, apic_cpuid(apic_id)); 1676 thread_unlock(td); 1677 } 1678 mtx_lock_spin(&icu_lock); 1679 lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = IRQ_FREE; 1680 mtx_unlock_spin(&icu_lock); 1681 if (!rebooting) { 1682 thread_lock(td); 1683 sched_unbind(td); 1684 thread_unlock(td); 1685 } 1686 } 1687 1688 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */ 1689 static u_int 1690 apic_idt_to_irq(u_int apic_id, u_int vector) 1691 { 1692 int irq; 1693 1694 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1695 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1696 ("Vector %u does not map to an IRQ line", vector)); 1697 #ifdef KDTRACE_HOOKS 1698 KASSERT(vector != IDT_DTRACE_RET, 1699 ("Attempt to overwrite DTrace entry")); 1700 #endif 1701 irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]; 1702 if (irq < 0) 1703 irq = 0; 1704 return (irq); 1705 } 1706 1707 #ifdef DDB 1708 /* 1709 * Dump data about APIC IDT vector mappings. 1710 */ 1711 DB_SHOW_COMMAND_FLAGS(apic, db_show_apic, DB_CMD_MEMSAFE) 1712 { 1713 struct intsrc *isrc; 1714 int i, verbose; 1715 u_int apic_id; 1716 u_int irq; 1717 1718 if (strcmp(modif, "vv") == 0) 1719 verbose = 2; 1720 else if (strcmp(modif, "v") == 0) 1721 verbose = 1; 1722 else 1723 verbose = 0; 1724 for (apic_id = 0; apic_id <= max_apic_id; apic_id++) { 1725 if (lapics[apic_id].la_present == 0) 1726 continue; 1727 db_printf("Interrupts bound to lapic %u\n", apic_id); 1728 for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) { 1729 irq = lapics[apic_id].la_ioint_irqs[i]; 1730 if (irq == IRQ_FREE || irq == IRQ_SYSCALL) 1731 continue; 1732 #ifdef KDTRACE_HOOKS 1733 if (irq == IRQ_DTRACE_RET) 1734 continue; 1735 #endif 1736 #ifdef XENHVM 1737 if (irq == IRQ_EVTCHN) 1738 continue; 1739 #endif 1740 db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); 1741 if (irq == IRQ_TIMER) 1742 db_printf("lapic timer\n"); 1743 else if (irq < num_io_irqs) { 1744 isrc = intr_lookup_source(irq); 1745 if (isrc == NULL || verbose == 0) 1746 db_printf("IRQ %u\n", irq); 1747 else 1748 db_dump_intr_event(isrc->is_event, 1749 verbose == 2); 1750 } else 1751 db_printf("IRQ %u ???\n", irq); 1752 } 1753 } 1754 } 1755 1756 static void 1757 dump_mask(const char *prefix, uint32_t v, int base) 1758 { 1759 int i, first; 1760 1761 first = 1; 1762 for (i = 0; i < 32; i++) 1763 if (v & (1 << i)) { 1764 if (first) { 1765 db_printf("%s:", prefix); 1766 first = 0; 1767 } 1768 db_printf(" %02x", base + i); 1769 } 1770 if (!first) 1771 db_printf("\n"); 1772 } 1773 1774 /* Show info from the lapic regs for this CPU. */ 1775 DB_SHOW_COMMAND_FLAGS(lapic, db_show_lapic, DB_CMD_MEMSAFE) 1776 { 1777 uint32_t v; 1778 1779 db_printf("lapic ID = %d\n", lapic_id()); 1780 v = lapic_read32(LAPIC_VERSION); 1781 db_printf("version = %d.%d\n", (v & APIC_VER_VERSION) >> 4, 1782 v & 0xf); 1783 db_printf("max LVT = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT); 1784 v = lapic_read32(LAPIC_SVR); 1785 db_printf("SVR = %02x (%s)\n", v & APIC_SVR_VECTOR, 1786 v & APIC_SVR_ENABLE ? "enabled" : "disabled"); 1787 db_printf("TPR = %02x\n", lapic_read32(LAPIC_TPR)); 1788 1789 #define dump_field(prefix, regn, index) \ 1790 dump_mask(__XSTRING(prefix ## index), \ 1791 lapic_read32(LAPIC_ ## regn ## index), \ 1792 index * 32) 1793 1794 db_printf("In-service Interrupts:\n"); 1795 dump_field(isr, ISR, 0); 1796 dump_field(isr, ISR, 1); 1797 dump_field(isr, ISR, 2); 1798 dump_field(isr, ISR, 3); 1799 dump_field(isr, ISR, 4); 1800 dump_field(isr, ISR, 5); 1801 dump_field(isr, ISR, 6); 1802 dump_field(isr, ISR, 7); 1803 1804 db_printf("TMR Interrupts:\n"); 1805 dump_field(tmr, TMR, 0); 1806 dump_field(tmr, TMR, 1); 1807 dump_field(tmr, TMR, 2); 1808 dump_field(tmr, TMR, 3); 1809 dump_field(tmr, TMR, 4); 1810 dump_field(tmr, TMR, 5); 1811 dump_field(tmr, TMR, 6); 1812 dump_field(tmr, TMR, 7); 1813 1814 db_printf("IRR Interrupts:\n"); 1815 dump_field(irr, IRR, 0); 1816 dump_field(irr, IRR, 1); 1817 dump_field(irr, IRR, 2); 1818 dump_field(irr, IRR, 3); 1819 dump_field(irr, IRR, 4); 1820 dump_field(irr, IRR, 5); 1821 dump_field(irr, IRR, 6); 1822 dump_field(irr, IRR, 7); 1823 1824 #undef dump_field 1825 } 1826 #endif 1827 1828 /* 1829 * APIC probing support code. This includes code to manage enumerators. 1830 */ 1831 1832 static SLIST_HEAD(, apic_enumerator) enumerators = 1833 SLIST_HEAD_INITIALIZER(enumerators); 1834 static struct apic_enumerator *best_enum; 1835 1836 void 1837 apic_register_enumerator(struct apic_enumerator *enumerator) 1838 { 1839 #ifdef INVARIANTS 1840 struct apic_enumerator *apic_enum; 1841 1842 SLIST_FOREACH(apic_enum, &enumerators, apic_next) { 1843 if (apic_enum == enumerator) 1844 panic("%s: Duplicate register of %s", __func__, 1845 enumerator->apic_name); 1846 } 1847 #endif 1848 SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next); 1849 } 1850 1851 /* 1852 * We have to look for CPU's very, very early because certain subsystems 1853 * want to know how many CPU's we have extremely early on in the boot 1854 * process. 1855 */ 1856 static void 1857 apic_init(void *dummy __unused) 1858 { 1859 struct apic_enumerator *enumerator; 1860 int retval, best; 1861 1862 /* We only support built in local APICs. */ 1863 if (!(cpu_feature & CPUID_APIC)) 1864 return; 1865 1866 /* Don't probe if APIC mode is disabled. */ 1867 if (resource_disabled("apic", 0)) 1868 return; 1869 1870 /* Probe all the enumerators to find the best match. */ 1871 best_enum = NULL; 1872 best = 0; 1873 SLIST_FOREACH(enumerator, &enumerators, apic_next) { 1874 retval = enumerator->apic_probe(); 1875 if (retval > 0) 1876 continue; 1877 if (best_enum == NULL || best < retval) { 1878 best_enum = enumerator; 1879 best = retval; 1880 } 1881 } 1882 if (best_enum == NULL) { 1883 if (bootverbose) 1884 printf("APIC: Could not find any APICs.\n"); 1885 #ifndef DEV_ATPIC 1886 panic("running without device atpic requires a local APIC"); 1887 #endif 1888 return; 1889 } 1890 1891 if (bootverbose) 1892 printf("APIC: Using the %s enumerator.\n", 1893 best_enum->apic_name); 1894 1895 #ifdef I686_CPU 1896 /* 1897 * To work around an errata, we disable the local APIC on some 1898 * CPUs during early startup. We need to turn the local APIC back 1899 * on on such CPUs now. 1900 */ 1901 ppro_reenable_apic(); 1902 #endif 1903 1904 /* Probe the CPU's in the system. */ 1905 retval = best_enum->apic_probe_cpus(); 1906 if (retval != 0) 1907 printf("%s: Failed to probe CPUs: returned %d\n", 1908 best_enum->apic_name, retval); 1909 1910 } 1911 SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL); 1912 1913 /* 1914 * Setup the local APIC. We have to do this prior to starting up the APs 1915 * in the SMP case. 1916 */ 1917 static void 1918 apic_setup_local(void *dummy __unused) 1919 { 1920 int retval; 1921 1922 if (best_enum == NULL) 1923 return; 1924 1925 lapics = malloc(sizeof(*lapics) * (max_apic_id + 1), M_LAPIC, 1926 M_WAITOK | M_ZERO); 1927 1928 /* Initialize the local APIC. */ 1929 retval = best_enum->apic_setup_local(); 1930 if (retval != 0) 1931 printf("%s: Failed to setup the local APIC: returned %d\n", 1932 best_enum->apic_name, retval); 1933 } 1934 SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL); 1935 1936 /* 1937 * Setup the I/O APICs. 1938 */ 1939 static void 1940 apic_setup_io(void *dummy __unused) 1941 { 1942 int retval; 1943 1944 if (best_enum == NULL) 1945 return; 1946 1947 /* 1948 * Local APIC must be registered before other PICs and pseudo PICs 1949 * for proper suspend/resume order. 1950 */ 1951 intr_register_pic(&lapic_pic); 1952 1953 retval = best_enum->apic_setup_io(); 1954 if (retval != 0) 1955 printf("%s: Failed to setup I/O APICs: returned %d\n", 1956 best_enum->apic_name, retval); 1957 1958 /* 1959 * Finish setting up the local APIC on the BSP once we know 1960 * how to properly program the LINT pins. In particular, this 1961 * enables the EOI suppression mode, if LAPIC supports it and 1962 * user did not disable the mode. 1963 */ 1964 lapic_setup(1); 1965 if (bootverbose) 1966 lapic_dump("BSP"); 1967 1968 /* Enable the MSI "pic". */ 1969 msi_init(); 1970 1971 #ifdef XENHVM 1972 xen_intr_alloc_irqs(); 1973 #endif 1974 } 1975 SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL); 1976 1977 #ifdef SMP 1978 /* 1979 * Inter Processor Interrupt functions. The lapic_ipi_*() functions are 1980 * private to the MD code. The public interface for the rest of the 1981 * kernel is defined in mp_machdep.c. 1982 */ 1983 1984 /* 1985 * Wait delay microseconds for IPI to be sent. If delay is -1, we 1986 * wait forever. 1987 */ 1988 int 1989 lapic_ipi_wait(int delay) 1990 { 1991 uint64_t rx; 1992 1993 /* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */ 1994 if (x2apic_mode) 1995 return (1); 1996 1997 for (rx = 0; delay == -1 || rx < lapic_ipi_wait_mult * delay; rx++) { 1998 if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) == 1999 APIC_DELSTAT_IDLE) 2000 return (1); 2001 ia32_pause(); 2002 } 2003 return (0); 2004 } 2005 2006 void 2007 lapic_ipi_raw(register_t icrlo, u_int dest) 2008 { 2009 uint32_t icrhi; 2010 2011 /* XXX: Need more sanity checking of icrlo? */ 2012 KASSERT(x2apic_mode || lapic_map != NULL, 2013 ("%s called too early", __func__)); 2014 KASSERT(x2apic_mode || 2015 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 2016 ("%s: invalid dest field", __func__)); 2017 KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0, 2018 ("%s: reserved bits set in ICR LO register", __func__)); 2019 2020 if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) { 2021 if (x2apic_mode) 2022 icrhi = dest; 2023 else 2024 icrhi = dest << APIC_ID_SHIFT; 2025 lapic_write_icr(icrhi, icrlo); 2026 } else { 2027 lapic_write_icr_lo(icrlo); 2028 } 2029 } 2030 2031 #ifdef DETECT_DEADLOCK 2032 #define AFTER_SPIN 50 2033 #endif 2034 2035 static void 2036 native_lapic_ipi_vectored(u_int vector, int dest) 2037 { 2038 register_t icrlo, destfield; 2039 2040 KASSERT((vector & ~APIC_VECTOR_MASK) == 0, 2041 ("%s: invalid vector %d", __func__, vector)); 2042 2043 destfield = 0; 2044 switch (dest) { 2045 case APIC_IPI_DEST_SELF: 2046 if (x2apic_mode && vector < IPI_NMI_FIRST) { 2047 lapic_write_self_ipi(vector); 2048 return; 2049 } 2050 icrlo = APIC_DEST_SELF; 2051 break; 2052 case APIC_IPI_DEST_ALL: 2053 icrlo = APIC_DEST_ALLISELF; 2054 break; 2055 case APIC_IPI_DEST_OTHERS: 2056 icrlo = APIC_DEST_ALLESELF; 2057 break; 2058 default: 2059 icrlo = 0; 2060 KASSERT(x2apic_mode || 2061 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 2062 ("%s: invalid destination 0x%x", __func__, dest)); 2063 destfield = dest; 2064 } 2065 2066 /* 2067 * NMI IPIs are just fake vectors used to send a NMI. Use special rules 2068 * regarding NMIs if passed, otherwise specify the vector. 2069 */ 2070 if (vector >= IPI_NMI_FIRST) 2071 icrlo |= APIC_DELMODE_NMI; 2072 else 2073 icrlo |= vector | APIC_DELMODE_FIXED; 2074 icrlo |= APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT; 2075 2076 /* Wait for an earlier IPI to finish. */ 2077 if (!lapic_ipi_wait(lapic_ds_idle_timeout)) { 2078 if (KERNEL_PANICKED()) 2079 return; 2080 else 2081 panic("APIC: Previous IPI is stuck"); 2082 } 2083 2084 lapic_ipi_raw(icrlo, destfield); 2085 2086 #ifdef DETECT_DEADLOCK 2087 /* Wait for IPI to be delivered. */ 2088 if (!lapic_ipi_wait(AFTER_SPIN)) { 2089 #ifdef needsattention 2090 /* 2091 * XXX FIXME: 2092 * 2093 * The above function waits for the message to actually be 2094 * delivered. It breaks out after an arbitrary timeout 2095 * since the message should eventually be delivered (at 2096 * least in theory) and that if it wasn't we would catch 2097 * the failure with the check above when the next IPI is 2098 * sent. 2099 * 2100 * We could skip this wait entirely, EXCEPT it probably 2101 * protects us from other routines that assume that the 2102 * message was delivered and acted upon when this function 2103 * returns. 2104 */ 2105 printf("APIC: IPI might be stuck\n"); 2106 #else /* !needsattention */ 2107 /* Wait until mesage is sent without a timeout. */ 2108 while (lapic_read_icr_lo() & APIC_DELSTAT_PEND) 2109 ia32_pause(); 2110 #endif /* needsattention */ 2111 } 2112 #endif /* DETECT_DEADLOCK */ 2113 } 2114 2115 void (*ipi_vectored)(u_int, int) = &native_lapic_ipi_vectored; 2116 #endif /* SMP */ 2117 2118 /* 2119 * Since the IDT is shared by all CPUs the IPI slot update needs to be globally 2120 * visible. 2121 * 2122 * Consider the case where an IPI is generated immediately after allocation: 2123 * vector = lapic_ipi_alloc(ipifunc); 2124 * ipi_selected(other_cpus, vector); 2125 * 2126 * In xAPIC mode a write to ICR_LO has serializing semantics because the 2127 * APIC page is mapped as an uncached region. In x2APIC mode there is an 2128 * explicit 'mfence' before the ICR MSR is written. Therefore in both cases 2129 * the IDT slot update is globally visible before the IPI is delivered. 2130 */ 2131 int 2132 lapic_ipi_alloc(inthand_t *ipifunc) 2133 { 2134 struct gate_descriptor *ip; 2135 long func; 2136 int idx, vector; 2137 2138 KASSERT(ipifunc != &IDTVEC(rsvd) && ipifunc != &IDTVEC(rsvd_pti), 2139 ("invalid ipifunc %p", ipifunc)); 2140 2141 vector = -1; 2142 mtx_lock_spin(&icu_lock); 2143 for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) { 2144 ip = &idt[idx]; 2145 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2146 #ifdef __i386__ 2147 func -= setidt_disp; 2148 #endif 2149 if ((!pti && func == (uintptr_t)&IDTVEC(rsvd)) || 2150 (pti && func == (uintptr_t)&IDTVEC(rsvd_pti))) { 2151 vector = idx; 2152 setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC); 2153 break; 2154 } 2155 } 2156 mtx_unlock_spin(&icu_lock); 2157 return (vector); 2158 } 2159 2160 void 2161 lapic_ipi_free(int vector) 2162 { 2163 struct gate_descriptor *ip; 2164 long func __diagused; 2165 2166 KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST, 2167 ("%s: invalid vector %d", __func__, vector)); 2168 2169 mtx_lock_spin(&icu_lock); 2170 ip = &idt[vector]; 2171 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2172 #ifdef __i386__ 2173 func -= setidt_disp; 2174 #endif 2175 KASSERT(func != (uintptr_t)&IDTVEC(rsvd) && 2176 func != (uintptr_t)&IDTVEC(rsvd_pti), 2177 ("invalid idtfunc %#lx", func)); 2178 setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC, 2179 SEL_KPL, GSEL_APIC); 2180 mtx_unlock_spin(&icu_lock); 2181 } 2182