1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1996, by Steve Passe 5 * All rights reserved. 6 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. The name of the developer may NOT be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 3. Neither the name of the author nor the names of any co-contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Local APIC support on Pentium and later processors. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_atpic.h" 40 #include "opt_hwpmc_hooks.h" 41 42 #include "opt_ddb.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/asan.h> 47 #include <sys/bus.h> 48 #include <sys/kernel.h> 49 #include <sys/lock.h> 50 #include <sys/malloc.h> 51 #include <sys/msan.h> 52 #include <sys/mutex.h> 53 #include <sys/pcpu.h> 54 #include <sys/proc.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/timeet.h> 59 60 #include <vm/vm.h> 61 #include <vm/pmap.h> 62 63 #include <x86/apicreg.h> 64 #include <machine/clock.h> 65 #include <machine/cpufunc.h> 66 #include <machine/cputypes.h> 67 #include <machine/frame.h> 68 #include <machine/intr_machdep.h> 69 #include <x86/apicvar.h> 70 #include <x86/mca.h> 71 #include <machine/md_var.h> 72 #include <machine/smp.h> 73 #include <machine/specialreg.h> 74 #include <x86/init.h> 75 76 #ifdef DDB 77 #include <sys/interrupt.h> 78 #include <ddb/ddb.h> 79 #endif 80 81 #ifdef __amd64__ 82 #define SDT_APIC SDT_SYSIGT 83 #define GSEL_APIC 0 84 #else 85 #define SDT_APIC SDT_SYS386IGT 86 #define GSEL_APIC GSEL(GCODE_SEL, SEL_KPL) 87 #endif 88 89 static MALLOC_DEFINE(M_LAPIC, "local_apic", "Local APIC items"); 90 91 /* Sanity checks on IDT vectors. */ 92 CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT); 93 CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS); 94 CTASSERT(APIC_LOCAL_INTS == 240); 95 CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); 96 97 /* 98 * I/O interrupts use non-negative IRQ values. These values are used 99 * to mark unused IDT entries or IDT entries reserved for a non-I/O 100 * interrupt. 101 */ 102 #define IRQ_FREE -1 103 #define IRQ_TIMER -2 104 #define IRQ_SYSCALL -3 105 #define IRQ_DTRACE_RET -4 106 #define IRQ_EVTCHN -5 107 108 enum lat_timer_mode { 109 LAT_MODE_UNDEF = 0, 110 LAT_MODE_PERIODIC = 1, 111 LAT_MODE_ONESHOT = 2, 112 LAT_MODE_DEADLINE = 3, 113 }; 114 115 /* 116 * Support for local APICs. Local APICs manage interrupts on each 117 * individual processor as opposed to I/O APICs which receive interrupts 118 * from I/O devices and then forward them on to the local APICs. 119 * 120 * Local APICs can also send interrupts to each other thus providing the 121 * mechanism for IPIs. 122 */ 123 124 struct lvt { 125 u_int lvt_edgetrigger:1; 126 u_int lvt_activehi:1; 127 u_int lvt_masked:1; 128 u_int lvt_active:1; 129 u_int lvt_mode:16; 130 u_int lvt_vector:8; 131 }; 132 133 struct lapic { 134 struct lvt la_lvts[APIC_LVT_MAX + 1]; 135 struct lvt la_elvts[APIC_ELVT_MAX + 1]; 136 u_int la_id:8; 137 u_int la_cluster:4; 138 u_int la_cluster_id:2; 139 u_int la_present:1; 140 u_long *la_timer_count; 141 uint64_t la_timer_period; 142 enum lat_timer_mode la_timer_mode; 143 uint32_t lvt_timer_base; 144 uint32_t lvt_timer_last; 145 /* Include IDT_SYSCALL to make indexing easier. */ 146 int la_ioint_irqs[APIC_NUM_IOINTS + 1]; 147 } static *lapics; 148 149 /* Global defaults for local APIC LVT entries. */ 150 static struct lvt lvts[APIC_LVT_MAX + 1] = { 151 { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 }, /* LINT0: masked ExtINT */ 152 { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 }, /* LINT1: NMI */ 153 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT }, /* Timer */ 154 { 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT }, /* Error */ 155 { 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 }, /* PMC */ 156 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT }, /* Thermal */ 157 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT }, /* CMCI */ 158 }; 159 160 /* Global defaults for AMD local APIC ELVT entries. */ 161 static struct lvt elvts[APIC_ELVT_MAX + 1] = { 162 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 163 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, APIC_CMC_INT }, 164 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 165 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 166 }; 167 168 static inthand_t *ioint_handlers[] = { 169 NULL, /* 0 - 31 */ 170 IDTVEC(apic_isr1), /* 32 - 63 */ 171 IDTVEC(apic_isr2), /* 64 - 95 */ 172 IDTVEC(apic_isr3), /* 96 - 127 */ 173 IDTVEC(apic_isr4), /* 128 - 159 */ 174 IDTVEC(apic_isr5), /* 160 - 191 */ 175 IDTVEC(apic_isr6), /* 192 - 223 */ 176 IDTVEC(apic_isr7), /* 224 - 255 */ 177 }; 178 179 static inthand_t *ioint_pti_handlers[] = { 180 NULL, /* 0 - 31 */ 181 IDTVEC(apic_isr1_pti), /* 32 - 63 */ 182 IDTVEC(apic_isr2_pti), /* 64 - 95 */ 183 IDTVEC(apic_isr3_pti), /* 96 - 127 */ 184 IDTVEC(apic_isr4_pti), /* 128 - 159 */ 185 IDTVEC(apic_isr5_pti), /* 160 - 191 */ 186 IDTVEC(apic_isr6_pti), /* 192 - 223 */ 187 IDTVEC(apic_isr7_pti), /* 224 - 255 */ 188 }; 189 190 static u_int32_t lapic_timer_divisors[] = { 191 APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, 192 APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128 193 }; 194 195 extern inthand_t IDTVEC(rsvd_pti), IDTVEC(rsvd); 196 197 volatile char *lapic_map; 198 vm_paddr_t lapic_paddr; 199 int x2apic_mode; 200 int lapic_eoi_suppression; 201 static int lapic_timer_tsc_deadline; 202 static u_long lapic_timer_divisor, count_freq; 203 static struct eventtimer lapic_et; 204 #ifdef SMP 205 static uint64_t lapic_ipi_wait_mult; 206 static int __read_mostly lapic_ds_idle_timeout = 1000000; 207 #endif 208 unsigned int max_apic_id; 209 210 SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 211 "APIC options"); 212 SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, ""); 213 SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD, 214 &lapic_eoi_suppression, 0, ""); 215 SYSCTL_INT(_hw_apic, OID_AUTO, timer_tsc_deadline, CTLFLAG_RD, 216 &lapic_timer_tsc_deadline, 0, ""); 217 #ifdef SMP 218 SYSCTL_INT(_hw_apic, OID_AUTO, ds_idle_timeout, CTLFLAG_RWTUN, 219 &lapic_ds_idle_timeout, 0, 220 "timeout (in us) for APIC Delivery Status to become Idle (xAPIC only)"); 221 #endif 222 223 static void lapic_calibrate_initcount(struct lapic *la); 224 static void lapic_calibrate_deadline(struct lapic *la); 225 226 /* 227 * Use __nosanitizethread to exempt the LAPIC I/O accessors from KCSan 228 * instrumentation. Otherwise, if x2APIC is not available, use of the global 229 * lapic_map will generate a KCSan false positive. While the mapping is 230 * shared among all CPUs, the physical access will always take place on the 231 * local CPU's APIC, so there isn't in fact a race here. Furthermore, the 232 * KCSan warning printf can cause a panic if issued during LAPIC access, 233 * due to attempted recursive use of event timer resources. 234 */ 235 236 static uint32_t __nosanitizethread 237 lapic_read32(enum LAPIC_REGISTERS reg) 238 { 239 uint32_t res; 240 241 if (x2apic_mode) { 242 res = rdmsr32(MSR_APIC_000 + reg); 243 } else { 244 res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL); 245 } 246 return (res); 247 } 248 249 static void __nosanitizethread 250 lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val) 251 { 252 253 if (x2apic_mode) { 254 mfence(); 255 lfence(); 256 wrmsr(MSR_APIC_000 + reg, val); 257 } else { 258 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 259 } 260 } 261 262 static void __nosanitizethread 263 lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val) 264 { 265 266 if (x2apic_mode) { 267 wrmsr(MSR_APIC_000 + reg, val); 268 } else { 269 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 270 } 271 } 272 273 #ifdef SMP 274 static uint64_t 275 lapic_read_icr_lo(void) 276 { 277 278 return (lapic_read32(LAPIC_ICR_LO)); 279 } 280 281 static void 282 lapic_write_icr(uint32_t vhi, uint32_t vlo) 283 { 284 register_t saveintr; 285 uint64_t v; 286 287 if (x2apic_mode) { 288 v = ((uint64_t)vhi << 32) | vlo; 289 mfence(); 290 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v); 291 } else { 292 saveintr = intr_disable(); 293 lapic_write32(LAPIC_ICR_HI, vhi); 294 lapic_write32(LAPIC_ICR_LO, vlo); 295 intr_restore(saveintr); 296 } 297 } 298 299 static void 300 lapic_write_icr_lo(uint32_t vlo) 301 { 302 303 if (x2apic_mode) { 304 mfence(); 305 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, vlo); 306 } else { 307 lapic_write32(LAPIC_ICR_LO, vlo); 308 } 309 } 310 311 static void 312 lapic_write_self_ipi(uint32_t vector) 313 { 314 315 KASSERT(x2apic_mode, ("SELF IPI write in xAPIC mode")); 316 wrmsr(MSR_APIC_000 + LAPIC_SELF_IPI, vector); 317 } 318 #endif /* SMP */ 319 320 static void 321 native_lapic_enable_x2apic(void) 322 { 323 uint64_t apic_base; 324 325 apic_base = rdmsr(MSR_APICBASE); 326 apic_base |= APICBASE_X2APIC | APICBASE_ENABLED; 327 wrmsr(MSR_APICBASE, apic_base); 328 } 329 330 static bool 331 native_lapic_is_x2apic(void) 332 { 333 uint64_t apic_base; 334 335 apic_base = rdmsr(MSR_APICBASE); 336 return ((apic_base & (APICBASE_X2APIC | APICBASE_ENABLED)) == 337 (APICBASE_X2APIC | APICBASE_ENABLED)); 338 } 339 340 static void lapic_enable(void); 341 static void lapic_resume(struct pic *pic, bool suspend_cancelled); 342 static void lapic_timer_oneshot(struct lapic *); 343 static void lapic_timer_oneshot_nointr(struct lapic *, uint32_t); 344 static void lapic_timer_periodic(struct lapic *); 345 static void lapic_timer_deadline(struct lapic *); 346 static void lapic_timer_stop(struct lapic *); 347 static void lapic_timer_set_divisor(u_int divisor); 348 static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value); 349 static int lapic_et_start(struct eventtimer *et, 350 sbintime_t first, sbintime_t period); 351 static int lapic_et_stop(struct eventtimer *et); 352 static u_int apic_idt_to_irq(u_int apic_id, u_int vector); 353 static void lapic_set_tpr(u_int vector); 354 355 struct pic lapic_pic = { .pic_resume = lapic_resume }; 356 357 /* Forward declarations for apic_ops */ 358 static void native_lapic_create(u_int apic_id, int boot_cpu); 359 static void native_lapic_init(vm_paddr_t addr); 360 static void native_lapic_xapic_mode(void); 361 static void native_lapic_setup(int boot); 362 static void native_lapic_dump(const char *str); 363 static void native_lapic_disable(void); 364 static void native_lapic_eoi(void); 365 static int native_lapic_id(void); 366 static int native_lapic_intr_pending(u_int vector); 367 static u_int native_apic_cpuid(u_int apic_id); 368 static u_int native_apic_alloc_vector(u_int apic_id, u_int irq); 369 static u_int native_apic_alloc_vectors(u_int apic_id, u_int *irqs, 370 u_int count, u_int align); 371 static void native_apic_disable_vector(u_int apic_id, u_int vector); 372 static void native_apic_enable_vector(u_int apic_id, u_int vector); 373 static void native_apic_free_vector(u_int apic_id, u_int vector, u_int irq); 374 static void native_lapic_set_logical_id(u_int apic_id, u_int cluster, 375 u_int cluster_id); 376 static int native_lapic_enable_pmc(void); 377 static void native_lapic_disable_pmc(void); 378 static void native_lapic_reenable_pmc(void); 379 static void native_lapic_enable_cmc(void); 380 static int native_lapic_enable_mca_elvt(void); 381 static int native_lapic_set_lvt_mask(u_int apic_id, u_int lvt, 382 u_char masked); 383 static int native_lapic_set_lvt_mode(u_int apic_id, u_int lvt, 384 uint32_t mode); 385 static int native_lapic_set_lvt_polarity(u_int apic_id, u_int lvt, 386 enum intr_polarity pol); 387 static int native_lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, 388 enum intr_trigger trigger); 389 #ifdef SMP 390 static void native_lapic_ipi_raw(register_t icrlo, u_int dest); 391 static void native_lapic_ipi_vectored(u_int vector, int dest); 392 static int native_lapic_ipi_wait(int delay); 393 #endif /* SMP */ 394 static int native_lapic_ipi_alloc(inthand_t *ipifunc); 395 static void native_lapic_ipi_free(int vector); 396 397 struct apic_ops apic_ops = { 398 .create = native_lapic_create, 399 .init = native_lapic_init, 400 .xapic_mode = native_lapic_xapic_mode, 401 .is_x2apic = native_lapic_is_x2apic, 402 .setup = native_lapic_setup, 403 .dump = native_lapic_dump, 404 .disable = native_lapic_disable, 405 .eoi = native_lapic_eoi, 406 .id = native_lapic_id, 407 .intr_pending = native_lapic_intr_pending, 408 .set_logical_id = native_lapic_set_logical_id, 409 .cpuid = native_apic_cpuid, 410 .alloc_vector = native_apic_alloc_vector, 411 .alloc_vectors = native_apic_alloc_vectors, 412 .enable_vector = native_apic_enable_vector, 413 .disable_vector = native_apic_disable_vector, 414 .free_vector = native_apic_free_vector, 415 .enable_pmc = native_lapic_enable_pmc, 416 .disable_pmc = native_lapic_disable_pmc, 417 .reenable_pmc = native_lapic_reenable_pmc, 418 .enable_cmc = native_lapic_enable_cmc, 419 .enable_mca_elvt = native_lapic_enable_mca_elvt, 420 #ifdef SMP 421 .ipi_raw = native_lapic_ipi_raw, 422 .ipi_vectored = native_lapic_ipi_vectored, 423 .ipi_wait = native_lapic_ipi_wait, 424 #endif 425 .ipi_alloc = native_lapic_ipi_alloc, 426 .ipi_free = native_lapic_ipi_free, 427 .set_lvt_mask = native_lapic_set_lvt_mask, 428 .set_lvt_mode = native_lapic_set_lvt_mode, 429 .set_lvt_polarity = native_lapic_set_lvt_polarity, 430 .set_lvt_triggermode = native_lapic_set_lvt_triggermode, 431 }; 432 433 static uint32_t 434 lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value) 435 { 436 437 value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM | 438 APIC_LVT_VECTOR); 439 if (lvt->lvt_edgetrigger == 0) 440 value |= APIC_LVT_TM; 441 if (lvt->lvt_activehi == 0) 442 value |= APIC_LVT_IIPP_INTALO; 443 if (lvt->lvt_masked) 444 value |= APIC_LVT_M; 445 value |= lvt->lvt_mode; 446 switch (lvt->lvt_mode) { 447 case APIC_LVT_DM_NMI: 448 case APIC_LVT_DM_SMI: 449 case APIC_LVT_DM_INIT: 450 case APIC_LVT_DM_EXTINT: 451 if (!lvt->lvt_edgetrigger && bootverbose) { 452 printf("lapic%u: Forcing LINT%u to edge trigger\n", 453 la->la_id, pin); 454 value &= ~APIC_LVT_TM; 455 } 456 /* Use a vector of 0. */ 457 break; 458 case APIC_LVT_DM_FIXED: 459 value |= lvt->lvt_vector; 460 break; 461 default: 462 panic("bad APIC LVT delivery mode: %#x\n", value); 463 } 464 return (value); 465 } 466 467 static uint32_t 468 lvt_mode(struct lapic *la, u_int pin, uint32_t value) 469 { 470 struct lvt *lvt; 471 472 KASSERT(pin <= APIC_LVT_MAX, 473 ("%s: pin %u out of range", __func__, pin)); 474 if (la->la_lvts[pin].lvt_active) 475 lvt = &la->la_lvts[pin]; 476 else 477 lvt = &lvts[pin]; 478 479 return (lvt_mode_impl(la, lvt, pin, value)); 480 } 481 482 static uint32_t 483 elvt_mode(struct lapic *la, u_int idx, uint32_t value) 484 { 485 struct lvt *elvt; 486 487 KASSERT(idx <= APIC_ELVT_MAX, 488 ("%s: idx %u out of range", __func__, idx)); 489 490 elvt = &la->la_elvts[idx]; 491 KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx)); 492 KASSERT(elvt->lvt_edgetrigger, 493 ("%s: ELVT%u is not edge triggered", __func__, idx)); 494 KASSERT(elvt->lvt_activehi, 495 ("%s: ELVT%u is not active high", __func__, idx)); 496 return (lvt_mode_impl(la, elvt, idx, value)); 497 } 498 499 /* 500 * Map the local APIC and setup necessary interrupt vectors. 501 */ 502 static void 503 native_lapic_init(vm_paddr_t addr) 504 { 505 #ifdef SMP 506 uint64_t r, r1, r2, rx; 507 #endif 508 uint32_t ver; 509 int i; 510 bool arat; 511 512 /* 513 * Enable x2APIC mode if possible. Map the local APIC 514 * registers page. 515 * 516 * Keep the LAPIC registers page mapped uncached for x2APIC 517 * mode too, to have direct map page attribute set to 518 * uncached. This is needed to work around CPU errata present 519 * on all Intel processors. 520 */ 521 KASSERT(trunc_page(addr) == addr, 522 ("local APIC not aligned on a page boundary")); 523 lapic_paddr = addr; 524 lapic_map = pmap_mapdev(addr, PAGE_SIZE); 525 if (x2apic_mode) { 526 native_lapic_enable_x2apic(); 527 lapic_map = NULL; 528 } 529 530 /* Setup the spurious interrupt handler. */ 531 setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL, 532 GSEL_APIC); 533 534 /* Perform basic initialization of the BSP's local APIC. */ 535 lapic_enable(); 536 537 /* Set BSP's per-CPU local APIC ID. */ 538 PCPU_SET(apic_id, lapic_id()); 539 540 /* Local APIC timer interrupt. */ 541 setidt(APIC_TIMER_INT, pti ? IDTVEC(timerint_pti) : IDTVEC(timerint), 542 SDT_APIC, SEL_KPL, GSEL_APIC); 543 544 /* Local APIC error interrupt. */ 545 setidt(APIC_ERROR_INT, pti ? IDTVEC(errorint_pti) : IDTVEC(errorint), 546 SDT_APIC, SEL_KPL, GSEL_APIC); 547 548 /* XXX: Thermal interrupt */ 549 550 /* Local APIC CMCI. */ 551 setidt(APIC_CMC_INT, pti ? IDTVEC(cmcint_pti) : IDTVEC(cmcint), 552 SDT_APIC, SEL_KPL, GSEL_APIC); 553 554 if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) { 555 /* Set if APIC timer runs in C3. */ 556 arat = (cpu_power_eax & CPUTPM1_ARAT); 557 558 bzero(&lapic_et, sizeof(lapic_et)); 559 lapic_et.et_name = "LAPIC"; 560 lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT | 561 ET_FLAGS_PERCPU; 562 lapic_et.et_quality = 600; 563 if (!arat) { 564 lapic_et.et_flags |= ET_FLAGS_C3STOP; 565 lapic_et.et_quality = 100; 566 } 567 if ((cpu_feature & CPUID_TSC) != 0 && 568 (cpu_feature2 & CPUID2_TSCDLT) != 0 && 569 tsc_is_invariant && tsc_freq != 0) { 570 lapic_timer_tsc_deadline = 1; 571 TUNABLE_INT_FETCH("hw.lapic_tsc_deadline", 572 &lapic_timer_tsc_deadline); 573 } 574 575 lapic_et.et_frequency = 0; 576 /* We don't know frequency yet, so trying to guess. */ 577 lapic_et.et_min_period = 0x00001000LL; 578 lapic_et.et_max_period = SBT_1S; 579 lapic_et.et_start = lapic_et_start; 580 lapic_et.et_stop = lapic_et_stop; 581 lapic_et.et_priv = NULL; 582 et_register(&lapic_et); 583 } 584 585 /* 586 * Set lapic_eoi_suppression after lapic_enable(), to not 587 * enable suppression in the hardware prematurely. Note that 588 * we by default enable suppression even when system only has 589 * one IO-APIC, since EOI is broadcasted to all APIC agents, 590 * including CPUs, otherwise. 591 * 592 * It seems that at least some KVM versions report 593 * EOI_SUPPRESSION bit, but auto-EOI does not work. 594 */ 595 ver = lapic_read32(LAPIC_VERSION); 596 if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) { 597 lapic_eoi_suppression = 1; 598 if (vm_guest == VM_GUEST_KVM) { 599 if (bootverbose) 600 printf( 601 "KVM -- disabling lapic eoi suppression\n"); 602 lapic_eoi_suppression = 0; 603 } 604 TUNABLE_INT_FETCH("hw.lapic_eoi_suppression", 605 &lapic_eoi_suppression); 606 } 607 608 #ifdef SMP 609 #define LOOPS 100000 610 /* 611 * Calibrate the busy loop waiting for IPI ack in xAPIC mode. 612 * lapic_ipi_wait_mult contains the number of iterations which 613 * approximately delay execution for 1 microsecond (the 614 * argument to native_lapic_ipi_wait() is in microseconds). 615 * 616 * We assume that TSC is present and already measured. 617 * Possible TSC frequency jumps are irrelevant to the 618 * calibration loop below, the CPU clock management code is 619 * not yet started, and we do not enter sleep states. 620 */ 621 KASSERT((cpu_feature & CPUID_TSC) != 0 && tsc_freq != 0, 622 ("TSC not initialized")); 623 if (!x2apic_mode) { 624 r = rdtsc(); 625 for (rx = 0; rx < LOOPS; rx++) { 626 (void)lapic_read_icr_lo(); 627 ia32_pause(); 628 } 629 r = rdtsc() - r; 630 r1 = tsc_freq * LOOPS; 631 r2 = r * 1000000; 632 lapic_ipi_wait_mult = r1 >= r2 ? r1 / r2 : 1; 633 if (bootverbose) { 634 printf("LAPIC: ipi_wait() us multiplier %ju (r %ju " 635 "tsc %ju)\n", (uintmax_t)lapic_ipi_wait_mult, 636 (uintmax_t)r, (uintmax_t)tsc_freq); 637 } 638 } 639 #undef LOOPS 640 #endif /* SMP */ 641 } 642 643 /* 644 * Create a local APIC instance. 645 */ 646 static void 647 native_lapic_create(u_int apic_id, int boot_cpu) 648 { 649 int i; 650 651 if (apic_id > max_apic_id) { 652 printf("APIC: Ignoring local APIC with ID %d\n", apic_id); 653 if (boot_cpu) 654 panic("Can't ignore BSP"); 655 return; 656 } 657 KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u", 658 apic_id)); 659 660 /* 661 * Assume no local LVT overrides and a cluster of 0 and 662 * intra-cluster ID of 0. 663 */ 664 lapics[apic_id].la_present = 1; 665 lapics[apic_id].la_id = apic_id; 666 for (i = 0; i <= APIC_LVT_MAX; i++) { 667 lapics[apic_id].la_lvts[i] = lvts[i]; 668 lapics[apic_id].la_lvts[i].lvt_active = 0; 669 } 670 for (i = 0; i <= APIC_ELVT_MAX; i++) { 671 lapics[apic_id].la_elvts[i] = elvts[i]; 672 lapics[apic_id].la_elvts[i].lvt_active = 0; 673 } 674 for (i = 0; i <= APIC_NUM_IOINTS; i++) 675 lapics[apic_id].la_ioint_irqs[i] = IRQ_FREE; 676 lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; 677 lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = 678 IRQ_TIMER; 679 #ifdef KDTRACE_HOOKS 680 lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = 681 IRQ_DTRACE_RET; 682 #endif 683 #ifdef XENHVM 684 lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN; 685 #endif 686 687 #ifdef SMP 688 cpu_add(apic_id, boot_cpu); 689 #endif 690 } 691 692 static inline uint32_t 693 amd_read_ext_features(void) 694 { 695 uint32_t version; 696 697 if (cpu_vendor_id != CPU_VENDOR_AMD && 698 cpu_vendor_id != CPU_VENDOR_HYGON) 699 return (0); 700 version = lapic_read32(LAPIC_VERSION); 701 if ((version & APIC_VER_AMD_EXT_SPACE) != 0) 702 return (lapic_read32(LAPIC_EXT_FEATURES)); 703 else 704 return (0); 705 } 706 707 static inline uint32_t 708 amd_read_elvt_count(void) 709 { 710 uint32_t extf; 711 uint32_t count; 712 713 extf = amd_read_ext_features(); 714 count = (extf & APIC_EXTF_ELVT_MASK) >> APIC_EXTF_ELVT_SHIFT; 715 count = min(count, APIC_ELVT_MAX + 1); 716 return (count); 717 } 718 719 /* 720 * Dump contents of local APIC registers 721 */ 722 static void 723 native_lapic_dump(const char* str) 724 { 725 uint32_t version; 726 uint32_t maxlvt; 727 uint32_t extf; 728 int elvt_count; 729 int i; 730 731 version = lapic_read32(LAPIC_VERSION); 732 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 733 printf("cpu%d %s:\n", PCPU_GET(cpuid), str); 734 printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x", 735 lapic_read32(LAPIC_ID), version, 736 lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR)); 737 if ((cpu_feature2 & CPUID2_X2APIC) != 0) 738 printf(" x2APIC: %d", x2apic_mode); 739 printf("\n lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", 740 lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1), 741 lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR)); 742 printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x", 743 lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL), 744 lapic_read32(LAPIC_LVT_ERROR)); 745 if (maxlvt >= APIC_LVT_PMC) 746 printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT)); 747 printf("\n"); 748 if (maxlvt >= APIC_LVT_CMCI) 749 printf(" cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI)); 750 extf = amd_read_ext_features(); 751 if (extf != 0) { 752 printf(" AMD ext features: 0x%08x\n", extf); 753 elvt_count = amd_read_elvt_count(); 754 for (i = 0; i < elvt_count; i++) 755 printf(" AMD elvt%d: 0x%08x\n", i, 756 lapic_read32(LAPIC_EXT_LVT0 + i)); 757 } 758 } 759 760 static void 761 native_lapic_xapic_mode(void) 762 { 763 register_t saveintr; 764 765 saveintr = intr_disable(); 766 if (x2apic_mode) 767 native_lapic_enable_x2apic(); 768 intr_restore(saveintr); 769 } 770 771 static void 772 native_lapic_setup(int boot) 773 { 774 struct lapic *la; 775 uint32_t version; 776 uint32_t maxlvt; 777 register_t saveintr; 778 int elvt_count; 779 int i; 780 781 saveintr = intr_disable(); 782 783 la = &lapics[lapic_id()]; 784 KASSERT(la->la_present, ("missing APIC structure")); 785 version = lapic_read32(LAPIC_VERSION); 786 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 787 788 /* Initialize the TPR to allow all interrupts. */ 789 lapic_set_tpr(0); 790 791 /* Setup spurious vector and enable the local APIC. */ 792 lapic_enable(); 793 794 /* Program LINT[01] LVT entries. */ 795 lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0, 796 lapic_read32(LAPIC_LVT_LINT0))); 797 lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1, 798 lapic_read32(LAPIC_LVT_LINT1))); 799 800 /* Program the PMC LVT entry if present. */ 801 if (maxlvt >= APIC_LVT_PMC) { 802 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 803 LAPIC_LVT_PCINT)); 804 } 805 806 /* Program timer LVT. */ 807 la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER, 808 lapic_read32(LAPIC_LVT_TIMER)); 809 la->lvt_timer_last = la->lvt_timer_base; 810 lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base); 811 812 /* Calibrate the timer parameters using BSP. */ 813 if (boot && IS_BSP()) { 814 lapic_calibrate_initcount(la); 815 if (lapic_timer_tsc_deadline) 816 lapic_calibrate_deadline(la); 817 } 818 819 /* Setup the timer if configured. */ 820 if (la->la_timer_mode != LAT_MODE_UNDEF) { 821 KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor", 822 lapic_id())); 823 switch (la->la_timer_mode) { 824 case LAT_MODE_PERIODIC: 825 lapic_timer_set_divisor(lapic_timer_divisor); 826 lapic_timer_periodic(la); 827 break; 828 case LAT_MODE_ONESHOT: 829 lapic_timer_set_divisor(lapic_timer_divisor); 830 lapic_timer_oneshot(la); 831 break; 832 case LAT_MODE_DEADLINE: 833 lapic_timer_deadline(la); 834 break; 835 default: 836 panic("corrupted la_timer_mode %p %d", la, 837 la->la_timer_mode); 838 } 839 } 840 841 /* Program error LVT and clear any existing errors. */ 842 lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR, 843 lapic_read32(LAPIC_LVT_ERROR))); 844 lapic_write32(LAPIC_ESR, 0); 845 846 /* XXX: Thermal LVT */ 847 848 /* Program the CMCI LVT entry if present. */ 849 if (maxlvt >= APIC_LVT_CMCI) { 850 lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI, 851 lapic_read32(LAPIC_LVT_CMCI))); 852 } 853 854 elvt_count = amd_read_elvt_count(); 855 for (i = 0; i < elvt_count; i++) { 856 if (la->la_elvts[i].lvt_active) 857 lapic_write32(LAPIC_EXT_LVT0 + i, 858 elvt_mode(la, i, lapic_read32(LAPIC_EXT_LVT0 + i))); 859 } 860 861 intr_restore(saveintr); 862 } 863 864 static void 865 native_lapic_intrcnt(void *dummy __unused) 866 { 867 struct pcpu *pc; 868 struct lapic *la; 869 char buf[MAXCOMLEN + 1]; 870 871 /* If there are no APICs, skip this function. */ 872 if (lapics == NULL) 873 return; 874 875 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 876 la = &lapics[pc->pc_apic_id]; 877 if (!la->la_present) 878 continue; 879 880 snprintf(buf, sizeof(buf), "cpu%d:timer", pc->pc_cpuid); 881 intrcnt_add(buf, &la->la_timer_count); 882 } 883 } 884 SYSINIT(native_lapic_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, native_lapic_intrcnt, 885 NULL); 886 887 static void 888 native_lapic_reenable_pmc(void) 889 { 890 #ifdef HWPMC_HOOKS 891 uint32_t value; 892 893 value = lapic_read32(LAPIC_LVT_PCINT); 894 value &= ~APIC_LVT_M; 895 lapic_write32(LAPIC_LVT_PCINT, value); 896 #endif 897 } 898 899 #ifdef HWPMC_HOOKS 900 static void 901 lapic_update_pmc(void *dummy) 902 { 903 struct lapic *la; 904 905 la = &lapics[lapic_id()]; 906 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 907 lapic_read32(LAPIC_LVT_PCINT))); 908 } 909 #endif 910 911 static int 912 native_lapic_enable_pmc(void) 913 { 914 #ifdef HWPMC_HOOKS 915 u_int32_t maxlvt; 916 917 /* Fail if the local APIC is not present. */ 918 if (!x2apic_mode && lapic_map == NULL) 919 return (0); 920 921 /* Fail if the PMC LVT is not present. */ 922 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 923 if (maxlvt < APIC_LVT_PMC) 924 return (0); 925 926 lvts[APIC_LVT_PMC].lvt_masked = 0; 927 928 #ifdef EARLY_AP_STARTUP 929 MPASS(mp_ncpus == 1 || smp_started); 930 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 931 #else 932 #ifdef SMP 933 /* 934 * If hwpmc was loaded at boot time then the APs may not be 935 * started yet. In that case, don't forward the request to 936 * them as they will program the lvt when they start. 937 */ 938 if (smp_started) 939 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 940 else 941 #endif 942 lapic_update_pmc(NULL); 943 #endif 944 return (1); 945 #else 946 return (0); 947 #endif 948 } 949 950 static void 951 native_lapic_disable_pmc(void) 952 { 953 #ifdef HWPMC_HOOKS 954 u_int32_t maxlvt; 955 956 /* Fail if the local APIC is not present. */ 957 if (!x2apic_mode && lapic_map == NULL) 958 return; 959 960 /* Fail if the PMC LVT is not present. */ 961 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 962 if (maxlvt < APIC_LVT_PMC) 963 return; 964 965 lvts[APIC_LVT_PMC].lvt_masked = 1; 966 967 #ifdef SMP 968 /* The APs should always be started when hwpmc is unloaded. */ 969 KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early")); 970 #endif 971 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 972 #endif 973 } 974 975 static void 976 lapic_calibrate_initcount(struct lapic *la) 977 { 978 u_long value; 979 980 /* Start off with a divisor of 2 (power on reset default). */ 981 lapic_timer_divisor = 2; 982 /* Try to calibrate the local APIC timer. */ 983 do { 984 lapic_timer_set_divisor(lapic_timer_divisor); 985 lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT); 986 DELAY(1000000); 987 value = APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER); 988 if (value != APIC_TIMER_MAX_COUNT) 989 break; 990 lapic_timer_divisor <<= 1; 991 } while (lapic_timer_divisor <= 128); 992 if (lapic_timer_divisor > 128) 993 panic("lapic: Divisor too big"); 994 if (bootverbose) { 995 printf("lapic: Divisor %lu, Frequency %lu Hz\n", 996 lapic_timer_divisor, value); 997 } 998 count_freq = value; 999 } 1000 1001 static void 1002 lapic_calibrate_deadline(struct lapic *la __unused) 1003 { 1004 1005 if (bootverbose) { 1006 printf("lapic: deadline tsc mode, Frequency %ju Hz\n", 1007 (uintmax_t)tsc_freq); 1008 } 1009 } 1010 1011 static void 1012 lapic_change_mode(struct eventtimer *et, struct lapic *la, 1013 enum lat_timer_mode newmode) 1014 { 1015 1016 if (la->la_timer_mode == newmode) 1017 return; 1018 switch (newmode) { 1019 case LAT_MODE_PERIODIC: 1020 lapic_timer_set_divisor(lapic_timer_divisor); 1021 et->et_frequency = count_freq; 1022 break; 1023 case LAT_MODE_DEADLINE: 1024 et->et_frequency = tsc_freq; 1025 break; 1026 case LAT_MODE_ONESHOT: 1027 lapic_timer_set_divisor(lapic_timer_divisor); 1028 et->et_frequency = count_freq; 1029 break; 1030 default: 1031 panic("lapic_change_mode %d", newmode); 1032 } 1033 la->la_timer_mode = newmode; 1034 et->et_min_period = (0x00000002LLU << 32) / et->et_frequency; 1035 et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency; 1036 } 1037 1038 static int 1039 lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) 1040 { 1041 struct lapic *la; 1042 1043 la = &lapics[PCPU_GET(apic_id)]; 1044 if (period != 0) { 1045 lapic_change_mode(et, la, LAT_MODE_PERIODIC); 1046 la->la_timer_period = ((uint32_t)et->et_frequency * period) >> 1047 32; 1048 lapic_timer_periodic(la); 1049 } else if (lapic_timer_tsc_deadline) { 1050 lapic_change_mode(et, la, LAT_MODE_DEADLINE); 1051 la->la_timer_period = (et->et_frequency * first) >> 32; 1052 lapic_timer_deadline(la); 1053 } else { 1054 lapic_change_mode(et, la, LAT_MODE_ONESHOT); 1055 la->la_timer_period = ((uint32_t)et->et_frequency * first) >> 1056 32; 1057 lapic_timer_oneshot(la); 1058 } 1059 return (0); 1060 } 1061 1062 static int 1063 lapic_et_stop(struct eventtimer *et) 1064 { 1065 struct lapic *la; 1066 1067 la = &lapics[PCPU_GET(apic_id)]; 1068 lapic_timer_stop(la); 1069 la->la_timer_mode = LAT_MODE_UNDEF; 1070 return (0); 1071 } 1072 1073 static void 1074 native_lapic_disable(void) 1075 { 1076 uint32_t value; 1077 1078 /* Software disable the local APIC. */ 1079 value = lapic_read32(LAPIC_SVR); 1080 value &= ~APIC_SVR_SWEN; 1081 lapic_write32(LAPIC_SVR, value); 1082 } 1083 1084 static void 1085 lapic_enable(void) 1086 { 1087 uint32_t value; 1088 1089 /* Program the spurious vector to enable the local APIC. */ 1090 value = lapic_read32(LAPIC_SVR); 1091 value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS); 1092 value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT; 1093 if (lapic_eoi_suppression) 1094 value |= APIC_SVR_EOI_SUPPRESSION; 1095 lapic_write32(LAPIC_SVR, value); 1096 } 1097 1098 /* Reset the local APIC on the BSP during resume. */ 1099 static void 1100 lapic_resume(struct pic *pic, bool suspend_cancelled) 1101 { 1102 1103 lapic_setup(0); 1104 } 1105 1106 static int 1107 native_lapic_id(void) 1108 { 1109 uint32_t v; 1110 1111 KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped")); 1112 v = lapic_read32(LAPIC_ID); 1113 if (!x2apic_mode) 1114 v >>= APIC_ID_SHIFT; 1115 return (v); 1116 } 1117 1118 static int 1119 native_lapic_intr_pending(u_int vector) 1120 { 1121 uint32_t irr; 1122 1123 /* 1124 * The IRR registers are an array of registers each of which 1125 * only describes 32 interrupts in the low 32 bits. Thus, we 1126 * divide the vector by 32 to get the register index. 1127 * Finally, we modulus the vector by 32 to determine the 1128 * individual bit to test. 1129 */ 1130 irr = lapic_read32(LAPIC_IRR0 + vector / 32); 1131 return (irr & 1 << (vector % 32)); 1132 } 1133 1134 static void 1135 native_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) 1136 { 1137 struct lapic *la; 1138 1139 KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist", 1140 __func__, apic_id)); 1141 KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big", 1142 __func__, cluster)); 1143 KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID, 1144 ("%s: intra cluster id %u too big", __func__, cluster_id)); 1145 la = &lapics[apic_id]; 1146 la->la_cluster = cluster; 1147 la->la_cluster_id = cluster_id; 1148 } 1149 1150 static int 1151 native_lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked) 1152 { 1153 1154 if (pin > APIC_LVT_MAX) 1155 return (EINVAL); 1156 if (apic_id == APIC_ID_ALL) { 1157 lvts[pin].lvt_masked = masked; 1158 if (bootverbose) 1159 printf("lapic:"); 1160 } else { 1161 KASSERT(lapics[apic_id].la_present, 1162 ("%s: missing APIC %u", __func__, apic_id)); 1163 lapics[apic_id].la_lvts[pin].lvt_masked = masked; 1164 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1165 if (bootverbose) 1166 printf("lapic%u:", apic_id); 1167 } 1168 if (bootverbose) 1169 printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked"); 1170 return (0); 1171 } 1172 1173 static int 1174 native_lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode) 1175 { 1176 struct lvt *lvt; 1177 1178 if (pin > APIC_LVT_MAX) 1179 return (EINVAL); 1180 if (apic_id == APIC_ID_ALL) { 1181 lvt = &lvts[pin]; 1182 if (bootverbose) 1183 printf("lapic:"); 1184 } else { 1185 KASSERT(lapics[apic_id].la_present, 1186 ("%s: missing APIC %u", __func__, apic_id)); 1187 lvt = &lapics[apic_id].la_lvts[pin]; 1188 lvt->lvt_active = 1; 1189 if (bootverbose) 1190 printf("lapic%u:", apic_id); 1191 } 1192 lvt->lvt_mode = mode; 1193 switch (mode) { 1194 case APIC_LVT_DM_NMI: 1195 case APIC_LVT_DM_SMI: 1196 case APIC_LVT_DM_INIT: 1197 case APIC_LVT_DM_EXTINT: 1198 lvt->lvt_edgetrigger = 1; 1199 lvt->lvt_activehi = 1; 1200 if (mode == APIC_LVT_DM_EXTINT) 1201 lvt->lvt_masked = 1; 1202 else 1203 lvt->lvt_masked = 0; 1204 break; 1205 default: 1206 panic("Unsupported delivery mode: 0x%x\n", mode); 1207 } 1208 if (bootverbose) { 1209 printf(" Routing "); 1210 switch (mode) { 1211 case APIC_LVT_DM_NMI: 1212 printf("NMI"); 1213 break; 1214 case APIC_LVT_DM_SMI: 1215 printf("SMI"); 1216 break; 1217 case APIC_LVT_DM_INIT: 1218 printf("INIT"); 1219 break; 1220 case APIC_LVT_DM_EXTINT: 1221 printf("ExtINT"); 1222 break; 1223 } 1224 printf(" -> LINT%u\n", pin); 1225 } 1226 return (0); 1227 } 1228 1229 static int 1230 native_lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol) 1231 { 1232 1233 if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM) 1234 return (EINVAL); 1235 if (apic_id == APIC_ID_ALL) { 1236 lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); 1237 if (bootverbose) 1238 printf("lapic:"); 1239 } else { 1240 KASSERT(lapics[apic_id].la_present, 1241 ("%s: missing APIC %u", __func__, apic_id)); 1242 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1243 lapics[apic_id].la_lvts[pin].lvt_activehi = 1244 (pol == INTR_POLARITY_HIGH); 1245 if (bootverbose) 1246 printf("lapic%u:", apic_id); 1247 } 1248 if (bootverbose) 1249 printf(" LINT%u polarity: %s\n", pin, 1250 pol == INTR_POLARITY_HIGH ? "high" : "low"); 1251 return (0); 1252 } 1253 1254 static int 1255 native_lapic_set_lvt_triggermode(u_int apic_id, u_int pin, 1256 enum intr_trigger trigger) 1257 { 1258 1259 if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM) 1260 return (EINVAL); 1261 if (apic_id == APIC_ID_ALL) { 1262 lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); 1263 if (bootverbose) 1264 printf("lapic:"); 1265 } else { 1266 KASSERT(lapics[apic_id].la_present, 1267 ("%s: missing APIC %u", __func__, apic_id)); 1268 lapics[apic_id].la_lvts[pin].lvt_edgetrigger = 1269 (trigger == INTR_TRIGGER_EDGE); 1270 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1271 if (bootverbose) 1272 printf("lapic%u:", apic_id); 1273 } 1274 if (bootverbose) 1275 printf(" LINT%u trigger: %s\n", pin, 1276 trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); 1277 return (0); 1278 } 1279 1280 /* 1281 * Adjust the TPR of the current CPU so that it blocks all interrupts below 1282 * the passed in vector. 1283 */ 1284 static void 1285 lapic_set_tpr(u_int vector) 1286 { 1287 #ifdef CHEAP_TPR 1288 lapic_write32(LAPIC_TPR, vector); 1289 #else 1290 uint32_t tpr; 1291 1292 tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO; 1293 tpr |= vector; 1294 lapic_write32(LAPIC_TPR, tpr); 1295 #endif 1296 } 1297 1298 static void 1299 native_lapic_eoi(void) 1300 { 1301 1302 lapic_write32_nofence(LAPIC_EOI, 0); 1303 } 1304 1305 void 1306 lapic_handle_intr(int vector, struct trapframe *frame) 1307 { 1308 struct intsrc *isrc; 1309 1310 kasan_mark(frame, sizeof(*frame), sizeof(*frame), 0); 1311 kmsan_mark(&vector, sizeof(vector), KMSAN_STATE_INITED); 1312 kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED); 1313 1314 isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id), 1315 vector)); 1316 intr_execute_handlers(isrc, frame); 1317 } 1318 1319 void 1320 lapic_handle_timer(struct trapframe *frame) 1321 { 1322 struct lapic *la; 1323 struct trapframe *oldframe; 1324 struct thread *td; 1325 1326 /* Send EOI first thing. */ 1327 lapic_eoi(); 1328 1329 kasan_mark(frame, sizeof(*frame), sizeof(*frame), 0); 1330 kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED); 1331 1332 #if defined(SMP) && !defined(SCHED_ULE) 1333 /* 1334 * Don't do any accounting for the disabled HTT cores, since it 1335 * will provide misleading numbers for the userland. 1336 * 1337 * No locking is necessary here, since even if we lose the race 1338 * when hlt_cpus_mask changes it is not a big deal, really. 1339 * 1340 * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask 1341 * and unlike other schedulers it actually schedules threads to 1342 * those CPUs. 1343 */ 1344 if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)) 1345 return; 1346 #endif 1347 1348 /* Look up our local APIC structure for the tick counters. */ 1349 la = &lapics[PCPU_GET(apic_id)]; 1350 (*la->la_timer_count)++; 1351 critical_enter(); 1352 if (lapic_et.et_active) { 1353 td = curthread; 1354 td->td_intr_nesting_level++; 1355 oldframe = td->td_intr_frame; 1356 td->td_intr_frame = frame; 1357 lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg); 1358 td->td_intr_frame = oldframe; 1359 td->td_intr_nesting_level--; 1360 } 1361 critical_exit(); 1362 } 1363 1364 static void 1365 lapic_timer_set_divisor(u_int divisor) 1366 { 1367 1368 KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor)); 1369 KASSERT(ffs(divisor) <= nitems(lapic_timer_divisors), 1370 ("lapic: invalid divisor %u", divisor)); 1371 lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]); 1372 } 1373 1374 static void 1375 lapic_timer_oneshot(struct lapic *la) 1376 { 1377 uint32_t value; 1378 1379 value = la->lvt_timer_base; 1380 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1381 value |= APIC_LVTT_TM_ONE_SHOT; 1382 la->lvt_timer_last = value; 1383 lapic_write32(LAPIC_LVT_TIMER, value); 1384 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1385 } 1386 1387 static void 1388 lapic_timer_oneshot_nointr(struct lapic *la, uint32_t count) 1389 { 1390 uint32_t value; 1391 1392 value = la->lvt_timer_base; 1393 value &= ~APIC_LVTT_TM; 1394 value |= APIC_LVTT_TM_ONE_SHOT | APIC_LVT_M; 1395 la->lvt_timer_last = value; 1396 lapic_write32(LAPIC_LVT_TIMER, value); 1397 lapic_write32(LAPIC_ICR_TIMER, count); 1398 } 1399 1400 static void 1401 lapic_timer_periodic(struct lapic *la) 1402 { 1403 uint32_t value; 1404 1405 value = la->lvt_timer_base; 1406 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1407 value |= APIC_LVTT_TM_PERIODIC; 1408 la->lvt_timer_last = value; 1409 lapic_write32(LAPIC_LVT_TIMER, value); 1410 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1411 } 1412 1413 static void 1414 lapic_timer_deadline(struct lapic *la) 1415 { 1416 uint32_t value; 1417 1418 value = la->lvt_timer_base; 1419 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1420 value |= APIC_LVTT_TM_TSCDLT; 1421 if (value != la->lvt_timer_last) { 1422 la->lvt_timer_last = value; 1423 lapic_write32_nofence(LAPIC_LVT_TIMER, value); 1424 if (!x2apic_mode) 1425 mfence(); 1426 } 1427 wrmsr(MSR_TSC_DEADLINE, la->la_timer_period + rdtsc()); 1428 } 1429 1430 static void 1431 lapic_timer_stop(struct lapic *la) 1432 { 1433 uint32_t value; 1434 1435 if (la->la_timer_mode == LAT_MODE_DEADLINE) { 1436 wrmsr(MSR_TSC_DEADLINE, 0); 1437 mfence(); 1438 } else { 1439 value = la->lvt_timer_base; 1440 value &= ~APIC_LVTT_TM; 1441 value |= APIC_LVT_M; 1442 la->lvt_timer_last = value; 1443 lapic_write32(LAPIC_LVT_TIMER, value); 1444 } 1445 } 1446 1447 void 1448 lapic_handle_cmc(void) 1449 { 1450 1451 lapic_eoi(); 1452 cmc_intr(); 1453 } 1454 1455 /* 1456 * Called from the mca_init() to activate the CMC interrupt if this CPU is 1457 * responsible for monitoring any MC banks for CMC events. Since mca_init() 1458 * is called prior to lapic_setup() during boot, this just needs to unmask 1459 * this CPU's LVT_CMCI entry. 1460 */ 1461 static void 1462 native_lapic_enable_cmc(void) 1463 { 1464 u_int apic_id; 1465 1466 #ifdef DEV_ATPIC 1467 if (!x2apic_mode && lapic_map == NULL) 1468 return; 1469 #endif 1470 apic_id = PCPU_GET(apic_id); 1471 KASSERT(lapics[apic_id].la_present, 1472 ("%s: missing APIC %u", __func__, apic_id)); 1473 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0; 1474 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1; 1475 if (bootverbose) 1476 printf("lapic%u: CMCI unmasked\n", apic_id); 1477 } 1478 1479 static int 1480 native_lapic_enable_mca_elvt(void) 1481 { 1482 u_int apic_id; 1483 uint32_t value; 1484 int elvt_count; 1485 1486 #ifdef DEV_ATPIC 1487 if (lapic_map == NULL) 1488 return (-1); 1489 #endif 1490 1491 apic_id = PCPU_GET(apic_id); 1492 KASSERT(lapics[apic_id].la_present, 1493 ("%s: missing APIC %u", __func__, apic_id)); 1494 elvt_count = amd_read_elvt_count(); 1495 if (elvt_count <= APIC_ELVT_MCA) 1496 return (-1); 1497 1498 value = lapic_read32(LAPIC_EXT_LVT0 + APIC_ELVT_MCA); 1499 if ((value & APIC_LVT_M) == 0) { 1500 if (bootverbose) 1501 printf("AMD MCE Thresholding Extended LVT is already active\n"); 1502 return (APIC_ELVT_MCA); 1503 } 1504 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_masked = 0; 1505 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_active = 1; 1506 if (bootverbose) 1507 printf("lapic%u: MCE Thresholding ELVT unmasked\n", apic_id); 1508 return (APIC_ELVT_MCA); 1509 } 1510 1511 void 1512 lapic_handle_error(void) 1513 { 1514 uint32_t esr; 1515 1516 /* 1517 * Read the contents of the error status register. Write to 1518 * the register first before reading from it to force the APIC 1519 * to update its value to indicate any errors that have 1520 * occurred since the previous write to the register. 1521 */ 1522 lapic_write32(LAPIC_ESR, 0); 1523 esr = lapic_read32(LAPIC_ESR); 1524 1525 printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr); 1526 lapic_eoi(); 1527 } 1528 1529 static u_int 1530 native_apic_cpuid(u_int apic_id) 1531 { 1532 #ifdef SMP 1533 return apic_cpuids[apic_id]; 1534 #else 1535 return 0; 1536 #endif 1537 } 1538 1539 /* Request a free IDT vector to be used by the specified IRQ. */ 1540 static u_int 1541 native_apic_alloc_vector(u_int apic_id, u_int irq) 1542 { 1543 u_int vector; 1544 1545 KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); 1546 1547 /* 1548 * Search for a free vector. Currently we just use a very simple 1549 * algorithm to find the first free vector. 1550 */ 1551 mtx_lock_spin(&icu_lock); 1552 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1553 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) 1554 continue; 1555 lapics[apic_id].la_ioint_irqs[vector] = irq; 1556 mtx_unlock_spin(&icu_lock); 1557 return (vector + APIC_IO_INTS); 1558 } 1559 mtx_unlock_spin(&icu_lock); 1560 return (0); 1561 } 1562 1563 /* 1564 * Request 'count' free contiguous IDT vectors to be used by 'count' 1565 * IRQs. 'count' must be a power of two and the vectors will be 1566 * aligned on a boundary of 'align'. If the request cannot be 1567 * satisfied, 0 is returned. 1568 */ 1569 static u_int 1570 native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) 1571 { 1572 u_int first, run, vector; 1573 1574 KASSERT(powerof2(count), ("bad count")); 1575 KASSERT(powerof2(align), ("bad align")); 1576 KASSERT(align >= count, ("align < count")); 1577 #ifdef INVARIANTS 1578 for (run = 0; run < count; run++) 1579 KASSERT(irqs[run] < num_io_irqs, ("Invalid IRQ %u at index %u", 1580 irqs[run], run)); 1581 #endif 1582 1583 /* 1584 * Search for 'count' free vectors. As with apic_alloc_vector(), 1585 * this just uses a simple first fit algorithm. 1586 */ 1587 run = 0; 1588 first = 0; 1589 mtx_lock_spin(&icu_lock); 1590 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1591 /* Vector is in use, end run. */ 1592 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) { 1593 run = 0; 1594 first = 0; 1595 continue; 1596 } 1597 1598 /* Start a new run if run == 0 and vector is aligned. */ 1599 if (run == 0) { 1600 if ((vector & (align - 1)) != 0) 1601 continue; 1602 first = vector; 1603 } 1604 run++; 1605 1606 /* Keep looping if the run isn't long enough yet. */ 1607 if (run < count) 1608 continue; 1609 1610 /* Found a run, assign IRQs and return the first vector. */ 1611 for (vector = 0; vector < count; vector++) 1612 lapics[apic_id].la_ioint_irqs[first + vector] = 1613 irqs[vector]; 1614 mtx_unlock_spin(&icu_lock); 1615 return (first + APIC_IO_INTS); 1616 } 1617 mtx_unlock_spin(&icu_lock); 1618 printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count); 1619 return (0); 1620 } 1621 1622 /* 1623 * Enable a vector for a particular apic_id. Since all lapics share idt 1624 * entries and ioint_handlers this enables the vector on all lapics. lapics 1625 * which do not have the vector configured would report spurious interrupts 1626 * should it fire. 1627 */ 1628 static void 1629 native_apic_enable_vector(u_int apic_id, u_int vector) 1630 { 1631 1632 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1633 KASSERT(ioint_handlers[vector / 32] != NULL, 1634 ("No ISR handler for vector %u", vector)); 1635 #ifdef KDTRACE_HOOKS 1636 KASSERT(vector != IDT_DTRACE_RET, 1637 ("Attempt to overwrite DTrace entry")); 1638 #endif 1639 setidt(vector, (pti ? ioint_pti_handlers : ioint_handlers)[vector / 32], 1640 SDT_APIC, SEL_KPL, GSEL_APIC); 1641 } 1642 1643 static void 1644 native_apic_disable_vector(u_int apic_id, u_int vector) 1645 { 1646 1647 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1648 #ifdef KDTRACE_HOOKS 1649 KASSERT(vector != IDT_DTRACE_RET, 1650 ("Attempt to overwrite DTrace entry")); 1651 #endif 1652 KASSERT(ioint_handlers[vector / 32] != NULL, 1653 ("No ISR handler for vector %u", vector)); 1654 #ifdef notyet 1655 /* 1656 * We can not currently clear the idt entry because other cpus 1657 * may have a valid vector at this offset. 1658 */ 1659 setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC, 1660 SEL_KPL, GSEL_APIC); 1661 #endif 1662 } 1663 1664 /* Release an APIC vector when it's no longer in use. */ 1665 static void 1666 native_apic_free_vector(u_int apic_id, u_int vector, u_int irq) 1667 { 1668 struct thread *td; 1669 1670 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1671 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1672 ("Vector %u does not map to an IRQ line", vector)); 1673 KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); 1674 KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] == 1675 irq, ("IRQ mismatch")); 1676 #ifdef KDTRACE_HOOKS 1677 KASSERT(vector != IDT_DTRACE_RET, 1678 ("Attempt to overwrite DTrace entry")); 1679 #endif 1680 1681 /* 1682 * Bind us to the cpu that owned the vector before freeing it so 1683 * we don't lose an interrupt delivery race. 1684 */ 1685 td = curthread; 1686 if (!rebooting) { 1687 thread_lock(td); 1688 if (sched_is_bound(td)) 1689 panic("apic_free_vector: Thread already bound.\n"); 1690 sched_bind(td, apic_cpuid(apic_id)); 1691 thread_unlock(td); 1692 } 1693 mtx_lock_spin(&icu_lock); 1694 lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = IRQ_FREE; 1695 mtx_unlock_spin(&icu_lock); 1696 if (!rebooting) { 1697 thread_lock(td); 1698 sched_unbind(td); 1699 thread_unlock(td); 1700 } 1701 } 1702 1703 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */ 1704 static u_int 1705 apic_idt_to_irq(u_int apic_id, u_int vector) 1706 { 1707 int irq; 1708 1709 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1710 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1711 ("Vector %u does not map to an IRQ line", vector)); 1712 #ifdef KDTRACE_HOOKS 1713 KASSERT(vector != IDT_DTRACE_RET, 1714 ("Attempt to overwrite DTrace entry")); 1715 #endif 1716 irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]; 1717 if (irq < 0) 1718 irq = 0; 1719 return (irq); 1720 } 1721 1722 #ifdef DDB 1723 /* 1724 * Dump data about APIC IDT vector mappings. 1725 */ 1726 DB_SHOW_COMMAND(apic, db_show_apic) 1727 { 1728 struct intsrc *isrc; 1729 int i, verbose; 1730 u_int apic_id; 1731 u_int irq; 1732 1733 if (strcmp(modif, "vv") == 0) 1734 verbose = 2; 1735 else if (strcmp(modif, "v") == 0) 1736 verbose = 1; 1737 else 1738 verbose = 0; 1739 for (apic_id = 0; apic_id <= max_apic_id; apic_id++) { 1740 if (lapics[apic_id].la_present == 0) 1741 continue; 1742 db_printf("Interrupts bound to lapic %u\n", apic_id); 1743 for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) { 1744 irq = lapics[apic_id].la_ioint_irqs[i]; 1745 if (irq == IRQ_FREE || irq == IRQ_SYSCALL) 1746 continue; 1747 #ifdef KDTRACE_HOOKS 1748 if (irq == IRQ_DTRACE_RET) 1749 continue; 1750 #endif 1751 #ifdef XENHVM 1752 if (irq == IRQ_EVTCHN) 1753 continue; 1754 #endif 1755 db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); 1756 if (irq == IRQ_TIMER) 1757 db_printf("lapic timer\n"); 1758 else if (irq < num_io_irqs) { 1759 isrc = intr_lookup_source(irq); 1760 if (isrc == NULL || verbose == 0) 1761 db_printf("IRQ %u\n", irq); 1762 else 1763 db_dump_intr_event(isrc->is_event, 1764 verbose == 2); 1765 } else 1766 db_printf("IRQ %u ???\n", irq); 1767 } 1768 } 1769 } 1770 1771 static void 1772 dump_mask(const char *prefix, uint32_t v, int base) 1773 { 1774 int i, first; 1775 1776 first = 1; 1777 for (i = 0; i < 32; i++) 1778 if (v & (1 << i)) { 1779 if (first) { 1780 db_printf("%s:", prefix); 1781 first = 0; 1782 } 1783 db_printf(" %02x", base + i); 1784 } 1785 if (!first) 1786 db_printf("\n"); 1787 } 1788 1789 /* Show info from the lapic regs for this CPU. */ 1790 DB_SHOW_COMMAND(lapic, db_show_lapic) 1791 { 1792 uint32_t v; 1793 1794 db_printf("lapic ID = %d\n", lapic_id()); 1795 v = lapic_read32(LAPIC_VERSION); 1796 db_printf("version = %d.%d\n", (v & APIC_VER_VERSION) >> 4, 1797 v & 0xf); 1798 db_printf("max LVT = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT); 1799 v = lapic_read32(LAPIC_SVR); 1800 db_printf("SVR = %02x (%s)\n", v & APIC_SVR_VECTOR, 1801 v & APIC_SVR_ENABLE ? "enabled" : "disabled"); 1802 db_printf("TPR = %02x\n", lapic_read32(LAPIC_TPR)); 1803 1804 #define dump_field(prefix, regn, index) \ 1805 dump_mask(__XSTRING(prefix ## index), \ 1806 lapic_read32(LAPIC_ ## regn ## index), \ 1807 index * 32) 1808 1809 db_printf("In-service Interrupts:\n"); 1810 dump_field(isr, ISR, 0); 1811 dump_field(isr, ISR, 1); 1812 dump_field(isr, ISR, 2); 1813 dump_field(isr, ISR, 3); 1814 dump_field(isr, ISR, 4); 1815 dump_field(isr, ISR, 5); 1816 dump_field(isr, ISR, 6); 1817 dump_field(isr, ISR, 7); 1818 1819 db_printf("TMR Interrupts:\n"); 1820 dump_field(tmr, TMR, 0); 1821 dump_field(tmr, TMR, 1); 1822 dump_field(tmr, TMR, 2); 1823 dump_field(tmr, TMR, 3); 1824 dump_field(tmr, TMR, 4); 1825 dump_field(tmr, TMR, 5); 1826 dump_field(tmr, TMR, 6); 1827 dump_field(tmr, TMR, 7); 1828 1829 db_printf("IRR Interrupts:\n"); 1830 dump_field(irr, IRR, 0); 1831 dump_field(irr, IRR, 1); 1832 dump_field(irr, IRR, 2); 1833 dump_field(irr, IRR, 3); 1834 dump_field(irr, IRR, 4); 1835 dump_field(irr, IRR, 5); 1836 dump_field(irr, IRR, 6); 1837 dump_field(irr, IRR, 7); 1838 1839 #undef dump_field 1840 } 1841 #endif 1842 1843 /* 1844 * APIC probing support code. This includes code to manage enumerators. 1845 */ 1846 1847 static SLIST_HEAD(, apic_enumerator) enumerators = 1848 SLIST_HEAD_INITIALIZER(enumerators); 1849 static struct apic_enumerator *best_enum; 1850 1851 void 1852 apic_register_enumerator(struct apic_enumerator *enumerator) 1853 { 1854 #ifdef INVARIANTS 1855 struct apic_enumerator *apic_enum; 1856 1857 SLIST_FOREACH(apic_enum, &enumerators, apic_next) { 1858 if (apic_enum == enumerator) 1859 panic("%s: Duplicate register of %s", __func__, 1860 enumerator->apic_name); 1861 } 1862 #endif 1863 SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next); 1864 } 1865 1866 /* 1867 * We have to look for CPU's very, very early because certain subsystems 1868 * want to know how many CPU's we have extremely early on in the boot 1869 * process. 1870 */ 1871 static void 1872 apic_init(void *dummy __unused) 1873 { 1874 struct apic_enumerator *enumerator; 1875 int retval, best; 1876 1877 /* We only support built in local APICs. */ 1878 if (!(cpu_feature & CPUID_APIC)) 1879 return; 1880 1881 /* Don't probe if APIC mode is disabled. */ 1882 if (resource_disabled("apic", 0)) 1883 return; 1884 1885 /* Probe all the enumerators to find the best match. */ 1886 best_enum = NULL; 1887 best = 0; 1888 SLIST_FOREACH(enumerator, &enumerators, apic_next) { 1889 retval = enumerator->apic_probe(); 1890 if (retval > 0) 1891 continue; 1892 if (best_enum == NULL || best < retval) { 1893 best_enum = enumerator; 1894 best = retval; 1895 } 1896 } 1897 if (best_enum == NULL) { 1898 if (bootverbose) 1899 printf("APIC: Could not find any APICs.\n"); 1900 #ifndef DEV_ATPIC 1901 panic("running without device atpic requires a local APIC"); 1902 #endif 1903 return; 1904 } 1905 1906 if (bootverbose) 1907 printf("APIC: Using the %s enumerator.\n", 1908 best_enum->apic_name); 1909 1910 #ifdef I686_CPU 1911 /* 1912 * To work around an errata, we disable the local APIC on some 1913 * CPUs during early startup. We need to turn the local APIC back 1914 * on on such CPUs now. 1915 */ 1916 ppro_reenable_apic(); 1917 #endif 1918 1919 /* Probe the CPU's in the system. */ 1920 retval = best_enum->apic_probe_cpus(); 1921 if (retval != 0) 1922 printf("%s: Failed to probe CPUs: returned %d\n", 1923 best_enum->apic_name, retval); 1924 1925 } 1926 SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL); 1927 1928 /* 1929 * Setup the local APIC. We have to do this prior to starting up the APs 1930 * in the SMP case. 1931 */ 1932 static void 1933 apic_setup_local(void *dummy __unused) 1934 { 1935 int retval; 1936 1937 if (best_enum == NULL) 1938 return; 1939 1940 lapics = malloc(sizeof(*lapics) * (max_apic_id + 1), M_LAPIC, 1941 M_WAITOK | M_ZERO); 1942 1943 /* Initialize the local APIC. */ 1944 retval = best_enum->apic_setup_local(); 1945 if (retval != 0) 1946 printf("%s: Failed to setup the local APIC: returned %d\n", 1947 best_enum->apic_name, retval); 1948 } 1949 SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL); 1950 1951 /* 1952 * Setup the I/O APICs. 1953 */ 1954 static void 1955 apic_setup_io(void *dummy __unused) 1956 { 1957 int retval; 1958 1959 if (best_enum == NULL) 1960 return; 1961 1962 /* 1963 * Local APIC must be registered before other PICs and pseudo PICs 1964 * for proper suspend/resume order. 1965 */ 1966 intr_register_pic(&lapic_pic); 1967 1968 retval = best_enum->apic_setup_io(); 1969 if (retval != 0) 1970 printf("%s: Failed to setup I/O APICs: returned %d\n", 1971 best_enum->apic_name, retval); 1972 1973 /* 1974 * Finish setting up the local APIC on the BSP once we know 1975 * how to properly program the LINT pins. In particular, this 1976 * enables the EOI suppression mode, if LAPIC supports it and 1977 * user did not disable the mode. 1978 */ 1979 lapic_setup(1); 1980 if (bootverbose) 1981 lapic_dump("BSP"); 1982 1983 /* Enable the MSI "pic". */ 1984 msi_init(); 1985 1986 #ifdef XENHVM 1987 xen_intr_alloc_irqs(); 1988 #endif 1989 } 1990 SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL); 1991 1992 #ifdef SMP 1993 /* 1994 * Inter Processor Interrupt functions. The lapic_ipi_*() functions are 1995 * private to the MD code. The public interface for the rest of the 1996 * kernel is defined in mp_machdep.c. 1997 */ 1998 1999 /* 2000 * Wait delay microseconds for IPI to be sent. If delay is -1, we 2001 * wait forever. 2002 */ 2003 static int 2004 native_lapic_ipi_wait(int delay) 2005 { 2006 uint64_t rx; 2007 2008 /* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */ 2009 if (x2apic_mode) 2010 return (1); 2011 2012 for (rx = 0; delay == -1 || rx < lapic_ipi_wait_mult * delay; rx++) { 2013 if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) == 2014 APIC_DELSTAT_IDLE) 2015 return (1); 2016 ia32_pause(); 2017 } 2018 return (0); 2019 } 2020 2021 static void 2022 native_lapic_ipi_raw(register_t icrlo, u_int dest) 2023 { 2024 uint32_t icrhi; 2025 2026 /* XXX: Need more sanity checking of icrlo? */ 2027 KASSERT(x2apic_mode || lapic_map != NULL, 2028 ("%s called too early", __func__)); 2029 KASSERT(x2apic_mode || 2030 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 2031 ("%s: invalid dest field", __func__)); 2032 KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0, 2033 ("%s: reserved bits set in ICR LO register", __func__)); 2034 2035 if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) { 2036 if (x2apic_mode) 2037 icrhi = dest; 2038 else 2039 icrhi = dest << APIC_ID_SHIFT; 2040 lapic_write_icr(icrhi, icrlo); 2041 } else { 2042 lapic_write_icr_lo(icrlo); 2043 } 2044 } 2045 2046 #ifdef DETECT_DEADLOCK 2047 #define AFTER_SPIN 50 2048 #endif 2049 2050 static void 2051 native_lapic_ipi_vectored(u_int vector, int dest) 2052 { 2053 register_t icrlo, destfield; 2054 2055 KASSERT((vector & ~APIC_VECTOR_MASK) == 0, 2056 ("%s: invalid vector %d", __func__, vector)); 2057 2058 destfield = 0; 2059 switch (dest) { 2060 case APIC_IPI_DEST_SELF: 2061 if (x2apic_mode && vector < IPI_NMI_FIRST) { 2062 lapic_write_self_ipi(vector); 2063 return; 2064 } 2065 icrlo = APIC_DEST_SELF; 2066 break; 2067 case APIC_IPI_DEST_ALL: 2068 icrlo = APIC_DEST_ALLISELF; 2069 break; 2070 case APIC_IPI_DEST_OTHERS: 2071 icrlo = APIC_DEST_ALLESELF; 2072 break; 2073 default: 2074 icrlo = 0; 2075 KASSERT(x2apic_mode || 2076 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 2077 ("%s: invalid destination 0x%x", __func__, dest)); 2078 destfield = dest; 2079 } 2080 2081 /* 2082 * NMI IPIs are just fake vectors used to send a NMI. Use special rules 2083 * regarding NMIs if passed, otherwise specify the vector. 2084 */ 2085 if (vector >= IPI_NMI_FIRST) 2086 icrlo |= APIC_DELMODE_NMI; 2087 else 2088 icrlo |= vector | APIC_DELMODE_FIXED; 2089 icrlo |= APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT; 2090 2091 /* Wait for an earlier IPI to finish. */ 2092 if (!lapic_ipi_wait(lapic_ds_idle_timeout)) { 2093 if (KERNEL_PANICKED()) 2094 return; 2095 else 2096 panic("APIC: Previous IPI is stuck"); 2097 } 2098 2099 lapic_ipi_raw(icrlo, destfield); 2100 2101 #ifdef DETECT_DEADLOCK 2102 /* Wait for IPI to be delivered. */ 2103 if (!lapic_ipi_wait(AFTER_SPIN)) { 2104 #ifdef needsattention 2105 /* 2106 * XXX FIXME: 2107 * 2108 * The above function waits for the message to actually be 2109 * delivered. It breaks out after an arbitrary timeout 2110 * since the message should eventually be delivered (at 2111 * least in theory) and that if it wasn't we would catch 2112 * the failure with the check above when the next IPI is 2113 * sent. 2114 * 2115 * We could skip this wait entirely, EXCEPT it probably 2116 * protects us from other routines that assume that the 2117 * message was delivered and acted upon when this function 2118 * returns. 2119 */ 2120 printf("APIC: IPI might be stuck\n"); 2121 #else /* !needsattention */ 2122 /* Wait until mesage is sent without a timeout. */ 2123 while (lapic_read_icr_lo() & APIC_DELSTAT_PEND) 2124 ia32_pause(); 2125 #endif /* needsattention */ 2126 } 2127 #endif /* DETECT_DEADLOCK */ 2128 } 2129 2130 #endif /* SMP */ 2131 2132 /* 2133 * Since the IDT is shared by all CPUs the IPI slot update needs to be globally 2134 * visible. 2135 * 2136 * Consider the case where an IPI is generated immediately after allocation: 2137 * vector = lapic_ipi_alloc(ipifunc); 2138 * ipi_selected(other_cpus, vector); 2139 * 2140 * In xAPIC mode a write to ICR_LO has serializing semantics because the 2141 * APIC page is mapped as an uncached region. In x2APIC mode there is an 2142 * explicit 'mfence' before the ICR MSR is written. Therefore in both cases 2143 * the IDT slot update is globally visible before the IPI is delivered. 2144 */ 2145 static int 2146 native_lapic_ipi_alloc(inthand_t *ipifunc) 2147 { 2148 struct gate_descriptor *ip; 2149 long func; 2150 int idx, vector; 2151 2152 KASSERT(ipifunc != &IDTVEC(rsvd) && ipifunc != &IDTVEC(rsvd_pti), 2153 ("invalid ipifunc %p", ipifunc)); 2154 2155 vector = -1; 2156 mtx_lock_spin(&icu_lock); 2157 for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) { 2158 ip = &idt[idx]; 2159 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2160 #ifdef __i386__ 2161 func -= setidt_disp; 2162 #endif 2163 if ((!pti && func == (uintptr_t)&IDTVEC(rsvd)) || 2164 (pti && func == (uintptr_t)&IDTVEC(rsvd_pti))) { 2165 vector = idx; 2166 setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC); 2167 break; 2168 } 2169 } 2170 mtx_unlock_spin(&icu_lock); 2171 return (vector); 2172 } 2173 2174 static void 2175 native_lapic_ipi_free(int vector) 2176 { 2177 struct gate_descriptor *ip; 2178 long func; 2179 2180 KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST, 2181 ("%s: invalid vector %d", __func__, vector)); 2182 2183 mtx_lock_spin(&icu_lock); 2184 ip = &idt[vector]; 2185 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2186 #ifdef __i386__ 2187 func -= setidt_disp; 2188 #endif 2189 KASSERT(func != (uintptr_t)&IDTVEC(rsvd) && 2190 func != (uintptr_t)&IDTVEC(rsvd_pti), 2191 ("invalid idtfunc %#lx", func)); 2192 setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC, 2193 SEL_KPL, GSEL_APIC); 2194 mtx_unlock_spin(&icu_lock); 2195 } 2196