1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1996, by Steve Passe 5 * All rights reserved. 6 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. The name of the developer may NOT be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 3. Neither the name of the author nor the names of any co-contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Local APIC support on Pentium and later processors. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_atpic.h" 40 #include "opt_hwpmc_hooks.h" 41 42 #include "opt_ddb.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/asan.h> 47 #include <sys/bus.h> 48 #include <sys/kernel.h> 49 #include <sys/lock.h> 50 #include <sys/malloc.h> 51 #include <sys/mutex.h> 52 #include <sys/pcpu.h> 53 #include <sys/proc.h> 54 #include <sys/sched.h> 55 #include <sys/smp.h> 56 #include <sys/sysctl.h> 57 #include <sys/timeet.h> 58 59 #include <vm/vm.h> 60 #include <vm/pmap.h> 61 62 #include <x86/apicreg.h> 63 #include <machine/clock.h> 64 #include <machine/cpufunc.h> 65 #include <machine/cputypes.h> 66 #include <machine/frame.h> 67 #include <machine/intr_machdep.h> 68 #include <x86/apicvar.h> 69 #include <x86/mca.h> 70 #include <machine/md_var.h> 71 #include <machine/smp.h> 72 #include <machine/specialreg.h> 73 #include <x86/init.h> 74 75 #ifdef DDB 76 #include <sys/interrupt.h> 77 #include <ddb/ddb.h> 78 #endif 79 80 #ifdef __amd64__ 81 #define SDT_APIC SDT_SYSIGT 82 #define GSEL_APIC 0 83 #else 84 #define SDT_APIC SDT_SYS386IGT 85 #define GSEL_APIC GSEL(GCODE_SEL, SEL_KPL) 86 #endif 87 88 static MALLOC_DEFINE(M_LAPIC, "local_apic", "Local APIC items"); 89 90 /* Sanity checks on IDT vectors. */ 91 CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT); 92 CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS); 93 CTASSERT(APIC_LOCAL_INTS == 240); 94 CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); 95 96 /* 97 * I/O interrupts use non-negative IRQ values. These values are used 98 * to mark unused IDT entries or IDT entries reserved for a non-I/O 99 * interrupt. 100 */ 101 #define IRQ_FREE -1 102 #define IRQ_TIMER -2 103 #define IRQ_SYSCALL -3 104 #define IRQ_DTRACE_RET -4 105 #define IRQ_EVTCHN -5 106 107 enum lat_timer_mode { 108 LAT_MODE_UNDEF = 0, 109 LAT_MODE_PERIODIC = 1, 110 LAT_MODE_ONESHOT = 2, 111 LAT_MODE_DEADLINE = 3, 112 }; 113 114 /* 115 * Support for local APICs. Local APICs manage interrupts on each 116 * individual processor as opposed to I/O APICs which receive interrupts 117 * from I/O devices and then forward them on to the local APICs. 118 * 119 * Local APICs can also send interrupts to each other thus providing the 120 * mechanism for IPIs. 121 */ 122 123 struct lvt { 124 u_int lvt_edgetrigger:1; 125 u_int lvt_activehi:1; 126 u_int lvt_masked:1; 127 u_int lvt_active:1; 128 u_int lvt_mode:16; 129 u_int lvt_vector:8; 130 }; 131 132 struct lapic { 133 struct lvt la_lvts[APIC_LVT_MAX + 1]; 134 struct lvt la_elvts[APIC_ELVT_MAX + 1]; 135 u_int la_id:8; 136 u_int la_cluster:4; 137 u_int la_cluster_id:2; 138 u_int la_present:1; 139 u_long *la_timer_count; 140 uint64_t la_timer_period; 141 enum lat_timer_mode la_timer_mode; 142 uint32_t lvt_timer_base; 143 uint32_t lvt_timer_last; 144 /* Include IDT_SYSCALL to make indexing easier. */ 145 int la_ioint_irqs[APIC_NUM_IOINTS + 1]; 146 } static *lapics; 147 148 /* Global defaults for local APIC LVT entries. */ 149 static struct lvt lvts[APIC_LVT_MAX + 1] = { 150 { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 }, /* LINT0: masked ExtINT */ 151 { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 }, /* LINT1: NMI */ 152 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT }, /* Timer */ 153 { 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT }, /* Error */ 154 { 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 }, /* PMC */ 155 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT }, /* Thermal */ 156 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT }, /* CMCI */ 157 }; 158 159 /* Global defaults for AMD local APIC ELVT entries. */ 160 static struct lvt elvts[APIC_ELVT_MAX + 1] = { 161 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 162 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, APIC_CMC_INT }, 163 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 164 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 165 }; 166 167 static inthand_t *ioint_handlers[] = { 168 NULL, /* 0 - 31 */ 169 IDTVEC(apic_isr1), /* 32 - 63 */ 170 IDTVEC(apic_isr2), /* 64 - 95 */ 171 IDTVEC(apic_isr3), /* 96 - 127 */ 172 IDTVEC(apic_isr4), /* 128 - 159 */ 173 IDTVEC(apic_isr5), /* 160 - 191 */ 174 IDTVEC(apic_isr6), /* 192 - 223 */ 175 IDTVEC(apic_isr7), /* 224 - 255 */ 176 }; 177 178 static inthand_t *ioint_pti_handlers[] = { 179 NULL, /* 0 - 31 */ 180 IDTVEC(apic_isr1_pti), /* 32 - 63 */ 181 IDTVEC(apic_isr2_pti), /* 64 - 95 */ 182 IDTVEC(apic_isr3_pti), /* 96 - 127 */ 183 IDTVEC(apic_isr4_pti), /* 128 - 159 */ 184 IDTVEC(apic_isr5_pti), /* 160 - 191 */ 185 IDTVEC(apic_isr6_pti), /* 192 - 223 */ 186 IDTVEC(apic_isr7_pti), /* 224 - 255 */ 187 }; 188 189 static u_int32_t lapic_timer_divisors[] = { 190 APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, 191 APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128 192 }; 193 194 extern inthand_t IDTVEC(rsvd_pti), IDTVEC(rsvd); 195 196 volatile char *lapic_map; 197 vm_paddr_t lapic_paddr; 198 int x2apic_mode; 199 int lapic_eoi_suppression; 200 static int lapic_timer_tsc_deadline; 201 static u_long lapic_timer_divisor, count_freq; 202 static struct eventtimer lapic_et; 203 #ifdef SMP 204 static uint64_t lapic_ipi_wait_mult; 205 static int __read_mostly lapic_ds_idle_timeout = 1000000; 206 #endif 207 unsigned int max_apic_id; 208 209 SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 210 "APIC options"); 211 SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, ""); 212 SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD, 213 &lapic_eoi_suppression, 0, ""); 214 SYSCTL_INT(_hw_apic, OID_AUTO, timer_tsc_deadline, CTLFLAG_RD, 215 &lapic_timer_tsc_deadline, 0, ""); 216 #ifdef SMP 217 SYSCTL_INT(_hw_apic, OID_AUTO, ds_idle_timeout, CTLFLAG_RWTUN, 218 &lapic_ds_idle_timeout, 0, 219 "timeout (in us) for APIC Delivery Status to become Idle (xAPIC only)"); 220 #endif 221 222 static void lapic_calibrate_initcount(struct lapic *la); 223 static void lapic_calibrate_deadline(struct lapic *la); 224 225 /* 226 * Use __nosanitizethread to exempt the LAPIC I/O accessors from KCSan 227 * instrumentation. Otherwise, if x2APIC is not available, use of the global 228 * lapic_map will generate a KCSan false positive. While the mapping is 229 * shared among all CPUs, the physical access will always take place on the 230 * local CPU's APIC, so there isn't in fact a race here. Furthermore, the 231 * KCSan warning printf can cause a panic if issued during LAPIC access, 232 * due to attempted recursive use of event timer resources. 233 */ 234 235 static uint32_t __nosanitizethread 236 lapic_read32(enum LAPIC_REGISTERS reg) 237 { 238 uint32_t res; 239 240 if (x2apic_mode) { 241 res = rdmsr32(MSR_APIC_000 + reg); 242 } else { 243 res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL); 244 } 245 return (res); 246 } 247 248 static void __nosanitizethread 249 lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val) 250 { 251 252 if (x2apic_mode) { 253 mfence(); 254 lfence(); 255 wrmsr(MSR_APIC_000 + reg, val); 256 } else { 257 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 258 } 259 } 260 261 static void __nosanitizethread 262 lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val) 263 { 264 265 if (x2apic_mode) { 266 wrmsr(MSR_APIC_000 + reg, val); 267 } else { 268 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 269 } 270 } 271 272 #ifdef SMP 273 static uint64_t 274 lapic_read_icr_lo(void) 275 { 276 277 return (lapic_read32(LAPIC_ICR_LO)); 278 } 279 280 static void 281 lapic_write_icr(uint32_t vhi, uint32_t vlo) 282 { 283 register_t saveintr; 284 uint64_t v; 285 286 if (x2apic_mode) { 287 v = ((uint64_t)vhi << 32) | vlo; 288 mfence(); 289 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v); 290 } else { 291 saveintr = intr_disable(); 292 lapic_write32(LAPIC_ICR_HI, vhi); 293 lapic_write32(LAPIC_ICR_LO, vlo); 294 intr_restore(saveintr); 295 } 296 } 297 298 static void 299 lapic_write_icr_lo(uint32_t vlo) 300 { 301 302 if (x2apic_mode) { 303 mfence(); 304 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, vlo); 305 } else { 306 lapic_write32(LAPIC_ICR_LO, vlo); 307 } 308 } 309 310 static void 311 lapic_write_self_ipi(uint32_t vector) 312 { 313 314 KASSERT(x2apic_mode, ("SELF IPI write in xAPIC mode")); 315 wrmsr(MSR_APIC_000 + LAPIC_SELF_IPI, vector); 316 } 317 #endif /* SMP */ 318 319 static void 320 native_lapic_enable_x2apic(void) 321 { 322 uint64_t apic_base; 323 324 apic_base = rdmsr(MSR_APICBASE); 325 apic_base |= APICBASE_X2APIC | APICBASE_ENABLED; 326 wrmsr(MSR_APICBASE, apic_base); 327 } 328 329 static bool 330 native_lapic_is_x2apic(void) 331 { 332 uint64_t apic_base; 333 334 apic_base = rdmsr(MSR_APICBASE); 335 return ((apic_base & (APICBASE_X2APIC | APICBASE_ENABLED)) == 336 (APICBASE_X2APIC | APICBASE_ENABLED)); 337 } 338 339 static void lapic_enable(void); 340 static void lapic_resume(struct pic *pic, bool suspend_cancelled); 341 static void lapic_timer_oneshot(struct lapic *); 342 static void lapic_timer_oneshot_nointr(struct lapic *, uint32_t); 343 static void lapic_timer_periodic(struct lapic *); 344 static void lapic_timer_deadline(struct lapic *); 345 static void lapic_timer_stop(struct lapic *); 346 static void lapic_timer_set_divisor(u_int divisor); 347 static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value); 348 static int lapic_et_start(struct eventtimer *et, 349 sbintime_t first, sbintime_t period); 350 static int lapic_et_stop(struct eventtimer *et); 351 static u_int apic_idt_to_irq(u_int apic_id, u_int vector); 352 static void lapic_set_tpr(u_int vector); 353 354 struct pic lapic_pic = { .pic_resume = lapic_resume }; 355 356 /* Forward declarations for apic_ops */ 357 static void native_lapic_create(u_int apic_id, int boot_cpu); 358 static void native_lapic_init(vm_paddr_t addr); 359 static void native_lapic_xapic_mode(void); 360 static void native_lapic_setup(int boot); 361 static void native_lapic_dump(const char *str); 362 static void native_lapic_disable(void); 363 static void native_lapic_eoi(void); 364 static int native_lapic_id(void); 365 static int native_lapic_intr_pending(u_int vector); 366 static u_int native_apic_cpuid(u_int apic_id); 367 static u_int native_apic_alloc_vector(u_int apic_id, u_int irq); 368 static u_int native_apic_alloc_vectors(u_int apic_id, u_int *irqs, 369 u_int count, u_int align); 370 static void native_apic_disable_vector(u_int apic_id, u_int vector); 371 static void native_apic_enable_vector(u_int apic_id, u_int vector); 372 static void native_apic_free_vector(u_int apic_id, u_int vector, u_int irq); 373 static void native_lapic_set_logical_id(u_int apic_id, u_int cluster, 374 u_int cluster_id); 375 static int native_lapic_enable_pmc(void); 376 static void native_lapic_disable_pmc(void); 377 static void native_lapic_reenable_pmc(void); 378 static void native_lapic_enable_cmc(void); 379 static int native_lapic_enable_mca_elvt(void); 380 static int native_lapic_set_lvt_mask(u_int apic_id, u_int lvt, 381 u_char masked); 382 static int native_lapic_set_lvt_mode(u_int apic_id, u_int lvt, 383 uint32_t mode); 384 static int native_lapic_set_lvt_polarity(u_int apic_id, u_int lvt, 385 enum intr_polarity pol); 386 static int native_lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, 387 enum intr_trigger trigger); 388 #ifdef SMP 389 static void native_lapic_ipi_raw(register_t icrlo, u_int dest); 390 static void native_lapic_ipi_vectored(u_int vector, int dest); 391 static int native_lapic_ipi_wait(int delay); 392 #endif /* SMP */ 393 static int native_lapic_ipi_alloc(inthand_t *ipifunc); 394 static void native_lapic_ipi_free(int vector); 395 396 struct apic_ops apic_ops = { 397 .create = native_lapic_create, 398 .init = native_lapic_init, 399 .xapic_mode = native_lapic_xapic_mode, 400 .is_x2apic = native_lapic_is_x2apic, 401 .setup = native_lapic_setup, 402 .dump = native_lapic_dump, 403 .disable = native_lapic_disable, 404 .eoi = native_lapic_eoi, 405 .id = native_lapic_id, 406 .intr_pending = native_lapic_intr_pending, 407 .set_logical_id = native_lapic_set_logical_id, 408 .cpuid = native_apic_cpuid, 409 .alloc_vector = native_apic_alloc_vector, 410 .alloc_vectors = native_apic_alloc_vectors, 411 .enable_vector = native_apic_enable_vector, 412 .disable_vector = native_apic_disable_vector, 413 .free_vector = native_apic_free_vector, 414 .enable_pmc = native_lapic_enable_pmc, 415 .disable_pmc = native_lapic_disable_pmc, 416 .reenable_pmc = native_lapic_reenable_pmc, 417 .enable_cmc = native_lapic_enable_cmc, 418 .enable_mca_elvt = native_lapic_enable_mca_elvt, 419 #ifdef SMP 420 .ipi_raw = native_lapic_ipi_raw, 421 .ipi_vectored = native_lapic_ipi_vectored, 422 .ipi_wait = native_lapic_ipi_wait, 423 #endif 424 .ipi_alloc = native_lapic_ipi_alloc, 425 .ipi_free = native_lapic_ipi_free, 426 .set_lvt_mask = native_lapic_set_lvt_mask, 427 .set_lvt_mode = native_lapic_set_lvt_mode, 428 .set_lvt_polarity = native_lapic_set_lvt_polarity, 429 .set_lvt_triggermode = native_lapic_set_lvt_triggermode, 430 }; 431 432 static uint32_t 433 lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value) 434 { 435 436 value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM | 437 APIC_LVT_VECTOR); 438 if (lvt->lvt_edgetrigger == 0) 439 value |= APIC_LVT_TM; 440 if (lvt->lvt_activehi == 0) 441 value |= APIC_LVT_IIPP_INTALO; 442 if (lvt->lvt_masked) 443 value |= APIC_LVT_M; 444 value |= lvt->lvt_mode; 445 switch (lvt->lvt_mode) { 446 case APIC_LVT_DM_NMI: 447 case APIC_LVT_DM_SMI: 448 case APIC_LVT_DM_INIT: 449 case APIC_LVT_DM_EXTINT: 450 if (!lvt->lvt_edgetrigger && bootverbose) { 451 printf("lapic%u: Forcing LINT%u to edge trigger\n", 452 la->la_id, pin); 453 value &= ~APIC_LVT_TM; 454 } 455 /* Use a vector of 0. */ 456 break; 457 case APIC_LVT_DM_FIXED: 458 value |= lvt->lvt_vector; 459 break; 460 default: 461 panic("bad APIC LVT delivery mode: %#x\n", value); 462 } 463 return (value); 464 } 465 466 static uint32_t 467 lvt_mode(struct lapic *la, u_int pin, uint32_t value) 468 { 469 struct lvt *lvt; 470 471 KASSERT(pin <= APIC_LVT_MAX, 472 ("%s: pin %u out of range", __func__, pin)); 473 if (la->la_lvts[pin].lvt_active) 474 lvt = &la->la_lvts[pin]; 475 else 476 lvt = &lvts[pin]; 477 478 return (lvt_mode_impl(la, lvt, pin, value)); 479 } 480 481 static uint32_t 482 elvt_mode(struct lapic *la, u_int idx, uint32_t value) 483 { 484 struct lvt *elvt; 485 486 KASSERT(idx <= APIC_ELVT_MAX, 487 ("%s: idx %u out of range", __func__, idx)); 488 489 elvt = &la->la_elvts[idx]; 490 KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx)); 491 KASSERT(elvt->lvt_edgetrigger, 492 ("%s: ELVT%u is not edge triggered", __func__, idx)); 493 KASSERT(elvt->lvt_activehi, 494 ("%s: ELVT%u is not active high", __func__, idx)); 495 return (lvt_mode_impl(la, elvt, idx, value)); 496 } 497 498 /* 499 * Map the local APIC and setup necessary interrupt vectors. 500 */ 501 static void 502 native_lapic_init(vm_paddr_t addr) 503 { 504 #ifdef SMP 505 uint64_t r, r1, r2, rx; 506 #endif 507 uint32_t ver; 508 int i; 509 bool arat; 510 511 /* 512 * Enable x2APIC mode if possible. Map the local APIC 513 * registers page. 514 * 515 * Keep the LAPIC registers page mapped uncached for x2APIC 516 * mode too, to have direct map page attribute set to 517 * uncached. This is needed to work around CPU errata present 518 * on all Intel processors. 519 */ 520 KASSERT(trunc_page(addr) == addr, 521 ("local APIC not aligned on a page boundary")); 522 lapic_paddr = addr; 523 lapic_map = pmap_mapdev(addr, PAGE_SIZE); 524 if (x2apic_mode) { 525 native_lapic_enable_x2apic(); 526 lapic_map = NULL; 527 } 528 529 /* Setup the spurious interrupt handler. */ 530 setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL, 531 GSEL_APIC); 532 533 /* Perform basic initialization of the BSP's local APIC. */ 534 lapic_enable(); 535 536 /* Set BSP's per-CPU local APIC ID. */ 537 PCPU_SET(apic_id, lapic_id()); 538 539 /* Local APIC timer interrupt. */ 540 setidt(APIC_TIMER_INT, pti ? IDTVEC(timerint_pti) : IDTVEC(timerint), 541 SDT_APIC, SEL_KPL, GSEL_APIC); 542 543 /* Local APIC error interrupt. */ 544 setidt(APIC_ERROR_INT, pti ? IDTVEC(errorint_pti) : IDTVEC(errorint), 545 SDT_APIC, SEL_KPL, GSEL_APIC); 546 547 /* XXX: Thermal interrupt */ 548 549 /* Local APIC CMCI. */ 550 setidt(APIC_CMC_INT, pti ? IDTVEC(cmcint_pti) : IDTVEC(cmcint), 551 SDT_APIC, SEL_KPL, GSEL_APIC); 552 553 if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) { 554 /* Set if APIC timer runs in C3. */ 555 arat = (cpu_power_eax & CPUTPM1_ARAT); 556 557 bzero(&lapic_et, sizeof(lapic_et)); 558 lapic_et.et_name = "LAPIC"; 559 lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT | 560 ET_FLAGS_PERCPU; 561 lapic_et.et_quality = 600; 562 if (!arat) { 563 lapic_et.et_flags |= ET_FLAGS_C3STOP; 564 lapic_et.et_quality = 100; 565 } 566 if ((cpu_feature & CPUID_TSC) != 0 && 567 (cpu_feature2 & CPUID2_TSCDLT) != 0 && 568 tsc_is_invariant && tsc_freq != 0) { 569 lapic_timer_tsc_deadline = 1; 570 TUNABLE_INT_FETCH("hw.lapic_tsc_deadline", 571 &lapic_timer_tsc_deadline); 572 } 573 574 lapic_et.et_frequency = 0; 575 /* We don't know frequency yet, so trying to guess. */ 576 lapic_et.et_min_period = 0x00001000LL; 577 lapic_et.et_max_period = SBT_1S; 578 lapic_et.et_start = lapic_et_start; 579 lapic_et.et_stop = lapic_et_stop; 580 lapic_et.et_priv = NULL; 581 et_register(&lapic_et); 582 } 583 584 /* 585 * Set lapic_eoi_suppression after lapic_enable(), to not 586 * enable suppression in the hardware prematurely. Note that 587 * we by default enable suppression even when system only has 588 * one IO-APIC, since EOI is broadcasted to all APIC agents, 589 * including CPUs, otherwise. 590 * 591 * It seems that at least some KVM versions report 592 * EOI_SUPPRESSION bit, but auto-EOI does not work. 593 */ 594 ver = lapic_read32(LAPIC_VERSION); 595 if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) { 596 lapic_eoi_suppression = 1; 597 if (vm_guest == VM_GUEST_KVM) { 598 if (bootverbose) 599 printf( 600 "KVM -- disabling lapic eoi suppression\n"); 601 lapic_eoi_suppression = 0; 602 } 603 TUNABLE_INT_FETCH("hw.lapic_eoi_suppression", 604 &lapic_eoi_suppression); 605 } 606 607 #ifdef SMP 608 #define LOOPS 100000 609 /* 610 * Calibrate the busy loop waiting for IPI ack in xAPIC mode. 611 * lapic_ipi_wait_mult contains the number of iterations which 612 * approximately delay execution for 1 microsecond (the 613 * argument to native_lapic_ipi_wait() is in microseconds). 614 * 615 * We assume that TSC is present and already measured. 616 * Possible TSC frequency jumps are irrelevant to the 617 * calibration loop below, the CPU clock management code is 618 * not yet started, and we do not enter sleep states. 619 */ 620 KASSERT((cpu_feature & CPUID_TSC) != 0 && tsc_freq != 0, 621 ("TSC not initialized")); 622 if (!x2apic_mode) { 623 r = rdtsc(); 624 for (rx = 0; rx < LOOPS; rx++) { 625 (void)lapic_read_icr_lo(); 626 ia32_pause(); 627 } 628 r = rdtsc() - r; 629 r1 = tsc_freq * LOOPS; 630 r2 = r * 1000000; 631 lapic_ipi_wait_mult = r1 >= r2 ? r1 / r2 : 1; 632 if (bootverbose) { 633 printf("LAPIC: ipi_wait() us multiplier %ju (r %ju " 634 "tsc %ju)\n", (uintmax_t)lapic_ipi_wait_mult, 635 (uintmax_t)r, (uintmax_t)tsc_freq); 636 } 637 } 638 #undef LOOPS 639 #endif /* SMP */ 640 } 641 642 /* 643 * Create a local APIC instance. 644 */ 645 static void 646 native_lapic_create(u_int apic_id, int boot_cpu) 647 { 648 int i; 649 650 if (apic_id > max_apic_id) { 651 printf("APIC: Ignoring local APIC with ID %d\n", apic_id); 652 if (boot_cpu) 653 panic("Can't ignore BSP"); 654 return; 655 } 656 KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u", 657 apic_id)); 658 659 /* 660 * Assume no local LVT overrides and a cluster of 0 and 661 * intra-cluster ID of 0. 662 */ 663 lapics[apic_id].la_present = 1; 664 lapics[apic_id].la_id = apic_id; 665 for (i = 0; i <= APIC_LVT_MAX; i++) { 666 lapics[apic_id].la_lvts[i] = lvts[i]; 667 lapics[apic_id].la_lvts[i].lvt_active = 0; 668 } 669 for (i = 0; i <= APIC_ELVT_MAX; i++) { 670 lapics[apic_id].la_elvts[i] = elvts[i]; 671 lapics[apic_id].la_elvts[i].lvt_active = 0; 672 } 673 for (i = 0; i <= APIC_NUM_IOINTS; i++) 674 lapics[apic_id].la_ioint_irqs[i] = IRQ_FREE; 675 lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; 676 lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = 677 IRQ_TIMER; 678 #ifdef KDTRACE_HOOKS 679 lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = 680 IRQ_DTRACE_RET; 681 #endif 682 #ifdef XENHVM 683 lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN; 684 #endif 685 686 #ifdef SMP 687 cpu_add(apic_id, boot_cpu); 688 #endif 689 } 690 691 static inline uint32_t 692 amd_read_ext_features(void) 693 { 694 uint32_t version; 695 696 if (cpu_vendor_id != CPU_VENDOR_AMD && 697 cpu_vendor_id != CPU_VENDOR_HYGON) 698 return (0); 699 version = lapic_read32(LAPIC_VERSION); 700 if ((version & APIC_VER_AMD_EXT_SPACE) != 0) 701 return (lapic_read32(LAPIC_EXT_FEATURES)); 702 else 703 return (0); 704 } 705 706 static inline uint32_t 707 amd_read_elvt_count(void) 708 { 709 uint32_t extf; 710 uint32_t count; 711 712 extf = amd_read_ext_features(); 713 count = (extf & APIC_EXTF_ELVT_MASK) >> APIC_EXTF_ELVT_SHIFT; 714 count = min(count, APIC_ELVT_MAX + 1); 715 return (count); 716 } 717 718 /* 719 * Dump contents of local APIC registers 720 */ 721 static void 722 native_lapic_dump(const char* str) 723 { 724 uint32_t version; 725 uint32_t maxlvt; 726 uint32_t extf; 727 int elvt_count; 728 int i; 729 730 version = lapic_read32(LAPIC_VERSION); 731 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 732 printf("cpu%d %s:\n", PCPU_GET(cpuid), str); 733 printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x", 734 lapic_read32(LAPIC_ID), version, 735 lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR)); 736 if ((cpu_feature2 & CPUID2_X2APIC) != 0) 737 printf(" x2APIC: %d", x2apic_mode); 738 printf("\n lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", 739 lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1), 740 lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR)); 741 printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x", 742 lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL), 743 lapic_read32(LAPIC_LVT_ERROR)); 744 if (maxlvt >= APIC_LVT_PMC) 745 printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT)); 746 printf("\n"); 747 if (maxlvt >= APIC_LVT_CMCI) 748 printf(" cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI)); 749 extf = amd_read_ext_features(); 750 if (extf != 0) { 751 printf(" AMD ext features: 0x%08x\n", extf); 752 elvt_count = amd_read_elvt_count(); 753 for (i = 0; i < elvt_count; i++) 754 printf(" AMD elvt%d: 0x%08x\n", i, 755 lapic_read32(LAPIC_EXT_LVT0 + i)); 756 } 757 } 758 759 static void 760 native_lapic_xapic_mode(void) 761 { 762 register_t saveintr; 763 764 saveintr = intr_disable(); 765 if (x2apic_mode) 766 native_lapic_enable_x2apic(); 767 intr_restore(saveintr); 768 } 769 770 static void 771 native_lapic_setup(int boot) 772 { 773 struct lapic *la; 774 uint32_t version; 775 uint32_t maxlvt; 776 register_t saveintr; 777 int elvt_count; 778 int i; 779 780 saveintr = intr_disable(); 781 782 la = &lapics[lapic_id()]; 783 KASSERT(la->la_present, ("missing APIC structure")); 784 version = lapic_read32(LAPIC_VERSION); 785 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 786 787 /* Initialize the TPR to allow all interrupts. */ 788 lapic_set_tpr(0); 789 790 /* Setup spurious vector and enable the local APIC. */ 791 lapic_enable(); 792 793 /* Program LINT[01] LVT entries. */ 794 lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0, 795 lapic_read32(LAPIC_LVT_LINT0))); 796 lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1, 797 lapic_read32(LAPIC_LVT_LINT1))); 798 799 /* Program the PMC LVT entry if present. */ 800 if (maxlvt >= APIC_LVT_PMC) { 801 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 802 LAPIC_LVT_PCINT)); 803 } 804 805 /* Program timer LVT. */ 806 la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER, 807 lapic_read32(LAPIC_LVT_TIMER)); 808 la->lvt_timer_last = la->lvt_timer_base; 809 lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base); 810 811 /* Calibrate the timer parameters using BSP. */ 812 if (boot && IS_BSP()) { 813 lapic_calibrate_initcount(la); 814 if (lapic_timer_tsc_deadline) 815 lapic_calibrate_deadline(la); 816 } 817 818 /* Setup the timer if configured. */ 819 if (la->la_timer_mode != LAT_MODE_UNDEF) { 820 KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor", 821 lapic_id())); 822 switch (la->la_timer_mode) { 823 case LAT_MODE_PERIODIC: 824 lapic_timer_set_divisor(lapic_timer_divisor); 825 lapic_timer_periodic(la); 826 break; 827 case LAT_MODE_ONESHOT: 828 lapic_timer_set_divisor(lapic_timer_divisor); 829 lapic_timer_oneshot(la); 830 break; 831 case LAT_MODE_DEADLINE: 832 lapic_timer_deadline(la); 833 break; 834 default: 835 panic("corrupted la_timer_mode %p %d", la, 836 la->la_timer_mode); 837 } 838 } 839 840 /* Program error LVT and clear any existing errors. */ 841 lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR, 842 lapic_read32(LAPIC_LVT_ERROR))); 843 lapic_write32(LAPIC_ESR, 0); 844 845 /* XXX: Thermal LVT */ 846 847 /* Program the CMCI LVT entry if present. */ 848 if (maxlvt >= APIC_LVT_CMCI) { 849 lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI, 850 lapic_read32(LAPIC_LVT_CMCI))); 851 } 852 853 elvt_count = amd_read_elvt_count(); 854 for (i = 0; i < elvt_count; i++) { 855 if (la->la_elvts[i].lvt_active) 856 lapic_write32(LAPIC_EXT_LVT0 + i, 857 elvt_mode(la, i, lapic_read32(LAPIC_EXT_LVT0 + i))); 858 } 859 860 intr_restore(saveintr); 861 } 862 863 static void 864 native_lapic_intrcnt(void *dummy __unused) 865 { 866 struct pcpu *pc; 867 struct lapic *la; 868 char buf[MAXCOMLEN + 1]; 869 870 /* If there are no APICs, skip this function. */ 871 if (lapics == NULL) 872 return; 873 874 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 875 la = &lapics[pc->pc_apic_id]; 876 if (!la->la_present) 877 continue; 878 879 snprintf(buf, sizeof(buf), "cpu%d:timer", pc->pc_cpuid); 880 intrcnt_add(buf, &la->la_timer_count); 881 } 882 } 883 SYSINIT(native_lapic_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, native_lapic_intrcnt, 884 NULL); 885 886 static void 887 native_lapic_reenable_pmc(void) 888 { 889 #ifdef HWPMC_HOOKS 890 uint32_t value; 891 892 value = lapic_read32(LAPIC_LVT_PCINT); 893 value &= ~APIC_LVT_M; 894 lapic_write32(LAPIC_LVT_PCINT, value); 895 #endif 896 } 897 898 #ifdef HWPMC_HOOKS 899 static void 900 lapic_update_pmc(void *dummy) 901 { 902 struct lapic *la; 903 904 la = &lapics[lapic_id()]; 905 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 906 lapic_read32(LAPIC_LVT_PCINT))); 907 } 908 #endif 909 910 static int 911 native_lapic_enable_pmc(void) 912 { 913 #ifdef HWPMC_HOOKS 914 u_int32_t maxlvt; 915 916 /* Fail if the local APIC is not present. */ 917 if (!x2apic_mode && lapic_map == NULL) 918 return (0); 919 920 /* Fail if the PMC LVT is not present. */ 921 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 922 if (maxlvt < APIC_LVT_PMC) 923 return (0); 924 925 lvts[APIC_LVT_PMC].lvt_masked = 0; 926 927 #ifdef EARLY_AP_STARTUP 928 MPASS(mp_ncpus == 1 || smp_started); 929 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 930 #else 931 #ifdef SMP 932 /* 933 * If hwpmc was loaded at boot time then the APs may not be 934 * started yet. In that case, don't forward the request to 935 * them as they will program the lvt when they start. 936 */ 937 if (smp_started) 938 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 939 else 940 #endif 941 lapic_update_pmc(NULL); 942 #endif 943 return (1); 944 #else 945 return (0); 946 #endif 947 } 948 949 static void 950 native_lapic_disable_pmc(void) 951 { 952 #ifdef HWPMC_HOOKS 953 u_int32_t maxlvt; 954 955 /* Fail if the local APIC is not present. */ 956 if (!x2apic_mode && lapic_map == NULL) 957 return; 958 959 /* Fail if the PMC LVT is not present. */ 960 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 961 if (maxlvt < APIC_LVT_PMC) 962 return; 963 964 lvts[APIC_LVT_PMC].lvt_masked = 1; 965 966 #ifdef SMP 967 /* The APs should always be started when hwpmc is unloaded. */ 968 KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early")); 969 #endif 970 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 971 #endif 972 } 973 974 static void 975 lapic_calibrate_initcount(struct lapic *la) 976 { 977 u_long value; 978 979 /* Start off with a divisor of 2 (power on reset default). */ 980 lapic_timer_divisor = 2; 981 /* Try to calibrate the local APIC timer. */ 982 do { 983 lapic_timer_set_divisor(lapic_timer_divisor); 984 lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT); 985 DELAY(1000000); 986 value = APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER); 987 if (value != APIC_TIMER_MAX_COUNT) 988 break; 989 lapic_timer_divisor <<= 1; 990 } while (lapic_timer_divisor <= 128); 991 if (lapic_timer_divisor > 128) 992 panic("lapic: Divisor too big"); 993 if (bootverbose) { 994 printf("lapic: Divisor %lu, Frequency %lu Hz\n", 995 lapic_timer_divisor, value); 996 } 997 count_freq = value; 998 } 999 1000 static void 1001 lapic_calibrate_deadline(struct lapic *la __unused) 1002 { 1003 1004 if (bootverbose) { 1005 printf("lapic: deadline tsc mode, Frequency %ju Hz\n", 1006 (uintmax_t)tsc_freq); 1007 } 1008 } 1009 1010 static void 1011 lapic_change_mode(struct eventtimer *et, struct lapic *la, 1012 enum lat_timer_mode newmode) 1013 { 1014 1015 if (la->la_timer_mode == newmode) 1016 return; 1017 switch (newmode) { 1018 case LAT_MODE_PERIODIC: 1019 lapic_timer_set_divisor(lapic_timer_divisor); 1020 et->et_frequency = count_freq; 1021 break; 1022 case LAT_MODE_DEADLINE: 1023 et->et_frequency = tsc_freq; 1024 break; 1025 case LAT_MODE_ONESHOT: 1026 lapic_timer_set_divisor(lapic_timer_divisor); 1027 et->et_frequency = count_freq; 1028 break; 1029 default: 1030 panic("lapic_change_mode %d", newmode); 1031 } 1032 la->la_timer_mode = newmode; 1033 et->et_min_period = (0x00000002LLU << 32) / et->et_frequency; 1034 et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency; 1035 } 1036 1037 static int 1038 lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) 1039 { 1040 struct lapic *la; 1041 1042 la = &lapics[PCPU_GET(apic_id)]; 1043 if (period != 0) { 1044 lapic_change_mode(et, la, LAT_MODE_PERIODIC); 1045 la->la_timer_period = ((uint32_t)et->et_frequency * period) >> 1046 32; 1047 lapic_timer_periodic(la); 1048 } else if (lapic_timer_tsc_deadline) { 1049 lapic_change_mode(et, la, LAT_MODE_DEADLINE); 1050 la->la_timer_period = (et->et_frequency * first) >> 32; 1051 lapic_timer_deadline(la); 1052 } else { 1053 lapic_change_mode(et, la, LAT_MODE_ONESHOT); 1054 la->la_timer_period = ((uint32_t)et->et_frequency * first) >> 1055 32; 1056 lapic_timer_oneshot(la); 1057 } 1058 return (0); 1059 } 1060 1061 static int 1062 lapic_et_stop(struct eventtimer *et) 1063 { 1064 struct lapic *la; 1065 1066 la = &lapics[PCPU_GET(apic_id)]; 1067 lapic_timer_stop(la); 1068 la->la_timer_mode = LAT_MODE_UNDEF; 1069 return (0); 1070 } 1071 1072 static void 1073 native_lapic_disable(void) 1074 { 1075 uint32_t value; 1076 1077 /* Software disable the local APIC. */ 1078 value = lapic_read32(LAPIC_SVR); 1079 value &= ~APIC_SVR_SWEN; 1080 lapic_write32(LAPIC_SVR, value); 1081 } 1082 1083 static void 1084 lapic_enable(void) 1085 { 1086 uint32_t value; 1087 1088 /* Program the spurious vector to enable the local APIC. */ 1089 value = lapic_read32(LAPIC_SVR); 1090 value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS); 1091 value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT; 1092 if (lapic_eoi_suppression) 1093 value |= APIC_SVR_EOI_SUPPRESSION; 1094 lapic_write32(LAPIC_SVR, value); 1095 } 1096 1097 /* Reset the local APIC on the BSP during resume. */ 1098 static void 1099 lapic_resume(struct pic *pic, bool suspend_cancelled) 1100 { 1101 1102 lapic_setup(0); 1103 } 1104 1105 static int 1106 native_lapic_id(void) 1107 { 1108 uint32_t v; 1109 1110 KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped")); 1111 v = lapic_read32(LAPIC_ID); 1112 if (!x2apic_mode) 1113 v >>= APIC_ID_SHIFT; 1114 return (v); 1115 } 1116 1117 static int 1118 native_lapic_intr_pending(u_int vector) 1119 { 1120 uint32_t irr; 1121 1122 /* 1123 * The IRR registers are an array of registers each of which 1124 * only describes 32 interrupts in the low 32 bits. Thus, we 1125 * divide the vector by 32 to get the register index. 1126 * Finally, we modulus the vector by 32 to determine the 1127 * individual bit to test. 1128 */ 1129 irr = lapic_read32(LAPIC_IRR0 + vector / 32); 1130 return (irr & 1 << (vector % 32)); 1131 } 1132 1133 static void 1134 native_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) 1135 { 1136 struct lapic *la; 1137 1138 KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist", 1139 __func__, apic_id)); 1140 KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big", 1141 __func__, cluster)); 1142 KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID, 1143 ("%s: intra cluster id %u too big", __func__, cluster_id)); 1144 la = &lapics[apic_id]; 1145 la->la_cluster = cluster; 1146 la->la_cluster_id = cluster_id; 1147 } 1148 1149 static int 1150 native_lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked) 1151 { 1152 1153 if (pin > APIC_LVT_MAX) 1154 return (EINVAL); 1155 if (apic_id == APIC_ID_ALL) { 1156 lvts[pin].lvt_masked = masked; 1157 if (bootverbose) 1158 printf("lapic:"); 1159 } else { 1160 KASSERT(lapics[apic_id].la_present, 1161 ("%s: missing APIC %u", __func__, apic_id)); 1162 lapics[apic_id].la_lvts[pin].lvt_masked = masked; 1163 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1164 if (bootverbose) 1165 printf("lapic%u:", apic_id); 1166 } 1167 if (bootverbose) 1168 printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked"); 1169 return (0); 1170 } 1171 1172 static int 1173 native_lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode) 1174 { 1175 struct lvt *lvt; 1176 1177 if (pin > APIC_LVT_MAX) 1178 return (EINVAL); 1179 if (apic_id == APIC_ID_ALL) { 1180 lvt = &lvts[pin]; 1181 if (bootverbose) 1182 printf("lapic:"); 1183 } else { 1184 KASSERT(lapics[apic_id].la_present, 1185 ("%s: missing APIC %u", __func__, apic_id)); 1186 lvt = &lapics[apic_id].la_lvts[pin]; 1187 lvt->lvt_active = 1; 1188 if (bootverbose) 1189 printf("lapic%u:", apic_id); 1190 } 1191 lvt->lvt_mode = mode; 1192 switch (mode) { 1193 case APIC_LVT_DM_NMI: 1194 case APIC_LVT_DM_SMI: 1195 case APIC_LVT_DM_INIT: 1196 case APIC_LVT_DM_EXTINT: 1197 lvt->lvt_edgetrigger = 1; 1198 lvt->lvt_activehi = 1; 1199 if (mode == APIC_LVT_DM_EXTINT) 1200 lvt->lvt_masked = 1; 1201 else 1202 lvt->lvt_masked = 0; 1203 break; 1204 default: 1205 panic("Unsupported delivery mode: 0x%x\n", mode); 1206 } 1207 if (bootverbose) { 1208 printf(" Routing "); 1209 switch (mode) { 1210 case APIC_LVT_DM_NMI: 1211 printf("NMI"); 1212 break; 1213 case APIC_LVT_DM_SMI: 1214 printf("SMI"); 1215 break; 1216 case APIC_LVT_DM_INIT: 1217 printf("INIT"); 1218 break; 1219 case APIC_LVT_DM_EXTINT: 1220 printf("ExtINT"); 1221 break; 1222 } 1223 printf(" -> LINT%u\n", pin); 1224 } 1225 return (0); 1226 } 1227 1228 static int 1229 native_lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol) 1230 { 1231 1232 if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM) 1233 return (EINVAL); 1234 if (apic_id == APIC_ID_ALL) { 1235 lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); 1236 if (bootverbose) 1237 printf("lapic:"); 1238 } else { 1239 KASSERT(lapics[apic_id].la_present, 1240 ("%s: missing APIC %u", __func__, apic_id)); 1241 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1242 lapics[apic_id].la_lvts[pin].lvt_activehi = 1243 (pol == INTR_POLARITY_HIGH); 1244 if (bootverbose) 1245 printf("lapic%u:", apic_id); 1246 } 1247 if (bootverbose) 1248 printf(" LINT%u polarity: %s\n", pin, 1249 pol == INTR_POLARITY_HIGH ? "high" : "low"); 1250 return (0); 1251 } 1252 1253 static int 1254 native_lapic_set_lvt_triggermode(u_int apic_id, u_int pin, 1255 enum intr_trigger trigger) 1256 { 1257 1258 if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM) 1259 return (EINVAL); 1260 if (apic_id == APIC_ID_ALL) { 1261 lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); 1262 if (bootverbose) 1263 printf("lapic:"); 1264 } else { 1265 KASSERT(lapics[apic_id].la_present, 1266 ("%s: missing APIC %u", __func__, apic_id)); 1267 lapics[apic_id].la_lvts[pin].lvt_edgetrigger = 1268 (trigger == INTR_TRIGGER_EDGE); 1269 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1270 if (bootverbose) 1271 printf("lapic%u:", apic_id); 1272 } 1273 if (bootverbose) 1274 printf(" LINT%u trigger: %s\n", pin, 1275 trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); 1276 return (0); 1277 } 1278 1279 /* 1280 * Adjust the TPR of the current CPU so that it blocks all interrupts below 1281 * the passed in vector. 1282 */ 1283 static void 1284 lapic_set_tpr(u_int vector) 1285 { 1286 #ifdef CHEAP_TPR 1287 lapic_write32(LAPIC_TPR, vector); 1288 #else 1289 uint32_t tpr; 1290 1291 tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO; 1292 tpr |= vector; 1293 lapic_write32(LAPIC_TPR, tpr); 1294 #endif 1295 } 1296 1297 static void 1298 native_lapic_eoi(void) 1299 { 1300 1301 lapic_write32_nofence(LAPIC_EOI, 0); 1302 } 1303 1304 void 1305 lapic_handle_intr(int vector, struct trapframe *frame) 1306 { 1307 struct intsrc *isrc; 1308 1309 /* The frame may have been written into a poisoned region. */ 1310 kasan_mark(frame, sizeof(*frame), sizeof(*frame), 0); 1311 1312 isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id), 1313 vector)); 1314 intr_execute_handlers(isrc, frame); 1315 } 1316 1317 void 1318 lapic_handle_timer(struct trapframe *frame) 1319 { 1320 struct lapic *la; 1321 struct trapframe *oldframe; 1322 struct thread *td; 1323 1324 /* Send EOI first thing. */ 1325 lapic_eoi(); 1326 1327 /* The frame may have been written into a poisoned region. */ 1328 kasan_mark(frame, sizeof(*frame), sizeof(*frame), 0); 1329 1330 #if defined(SMP) && !defined(SCHED_ULE) 1331 /* 1332 * Don't do any accounting for the disabled HTT cores, since it 1333 * will provide misleading numbers for the userland. 1334 * 1335 * No locking is necessary here, since even if we lose the race 1336 * when hlt_cpus_mask changes it is not a big deal, really. 1337 * 1338 * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask 1339 * and unlike other schedulers it actually schedules threads to 1340 * those CPUs. 1341 */ 1342 if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)) 1343 return; 1344 #endif 1345 1346 /* Look up our local APIC structure for the tick counters. */ 1347 la = &lapics[PCPU_GET(apic_id)]; 1348 (*la->la_timer_count)++; 1349 critical_enter(); 1350 if (lapic_et.et_active) { 1351 td = curthread; 1352 td->td_intr_nesting_level++; 1353 oldframe = td->td_intr_frame; 1354 td->td_intr_frame = frame; 1355 lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg); 1356 td->td_intr_frame = oldframe; 1357 td->td_intr_nesting_level--; 1358 } 1359 critical_exit(); 1360 } 1361 1362 static void 1363 lapic_timer_set_divisor(u_int divisor) 1364 { 1365 1366 KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor)); 1367 KASSERT(ffs(divisor) <= nitems(lapic_timer_divisors), 1368 ("lapic: invalid divisor %u", divisor)); 1369 lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]); 1370 } 1371 1372 static void 1373 lapic_timer_oneshot(struct lapic *la) 1374 { 1375 uint32_t value; 1376 1377 value = la->lvt_timer_base; 1378 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1379 value |= APIC_LVTT_TM_ONE_SHOT; 1380 la->lvt_timer_last = value; 1381 lapic_write32(LAPIC_LVT_TIMER, value); 1382 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1383 } 1384 1385 static void 1386 lapic_timer_oneshot_nointr(struct lapic *la, uint32_t count) 1387 { 1388 uint32_t value; 1389 1390 value = la->lvt_timer_base; 1391 value &= ~APIC_LVTT_TM; 1392 value |= APIC_LVTT_TM_ONE_SHOT | APIC_LVT_M; 1393 la->lvt_timer_last = value; 1394 lapic_write32(LAPIC_LVT_TIMER, value); 1395 lapic_write32(LAPIC_ICR_TIMER, count); 1396 } 1397 1398 static void 1399 lapic_timer_periodic(struct lapic *la) 1400 { 1401 uint32_t value; 1402 1403 value = la->lvt_timer_base; 1404 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1405 value |= APIC_LVTT_TM_PERIODIC; 1406 la->lvt_timer_last = value; 1407 lapic_write32(LAPIC_LVT_TIMER, value); 1408 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1409 } 1410 1411 static void 1412 lapic_timer_deadline(struct lapic *la) 1413 { 1414 uint32_t value; 1415 1416 value = la->lvt_timer_base; 1417 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1418 value |= APIC_LVTT_TM_TSCDLT; 1419 if (value != la->lvt_timer_last) { 1420 la->lvt_timer_last = value; 1421 lapic_write32_nofence(LAPIC_LVT_TIMER, value); 1422 if (!x2apic_mode) 1423 mfence(); 1424 } 1425 wrmsr(MSR_TSC_DEADLINE, la->la_timer_period + rdtsc()); 1426 } 1427 1428 static void 1429 lapic_timer_stop(struct lapic *la) 1430 { 1431 uint32_t value; 1432 1433 if (la->la_timer_mode == LAT_MODE_DEADLINE) { 1434 wrmsr(MSR_TSC_DEADLINE, 0); 1435 mfence(); 1436 } else { 1437 value = la->lvt_timer_base; 1438 value &= ~APIC_LVTT_TM; 1439 value |= APIC_LVT_M; 1440 la->lvt_timer_last = value; 1441 lapic_write32(LAPIC_LVT_TIMER, value); 1442 } 1443 } 1444 1445 void 1446 lapic_handle_cmc(void) 1447 { 1448 1449 lapic_eoi(); 1450 cmc_intr(); 1451 } 1452 1453 /* 1454 * Called from the mca_init() to activate the CMC interrupt if this CPU is 1455 * responsible for monitoring any MC banks for CMC events. Since mca_init() 1456 * is called prior to lapic_setup() during boot, this just needs to unmask 1457 * this CPU's LVT_CMCI entry. 1458 */ 1459 static void 1460 native_lapic_enable_cmc(void) 1461 { 1462 u_int apic_id; 1463 1464 #ifdef DEV_ATPIC 1465 if (!x2apic_mode && lapic_map == NULL) 1466 return; 1467 #endif 1468 apic_id = PCPU_GET(apic_id); 1469 KASSERT(lapics[apic_id].la_present, 1470 ("%s: missing APIC %u", __func__, apic_id)); 1471 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0; 1472 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1; 1473 if (bootverbose) 1474 printf("lapic%u: CMCI unmasked\n", apic_id); 1475 } 1476 1477 static int 1478 native_lapic_enable_mca_elvt(void) 1479 { 1480 u_int apic_id; 1481 uint32_t value; 1482 int elvt_count; 1483 1484 #ifdef DEV_ATPIC 1485 if (lapic_map == NULL) 1486 return (-1); 1487 #endif 1488 1489 apic_id = PCPU_GET(apic_id); 1490 KASSERT(lapics[apic_id].la_present, 1491 ("%s: missing APIC %u", __func__, apic_id)); 1492 elvt_count = amd_read_elvt_count(); 1493 if (elvt_count <= APIC_ELVT_MCA) 1494 return (-1); 1495 1496 value = lapic_read32(LAPIC_EXT_LVT0 + APIC_ELVT_MCA); 1497 if ((value & APIC_LVT_M) == 0) { 1498 if (bootverbose) 1499 printf("AMD MCE Thresholding Extended LVT is already active\n"); 1500 return (APIC_ELVT_MCA); 1501 } 1502 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_masked = 0; 1503 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_active = 1; 1504 if (bootverbose) 1505 printf("lapic%u: MCE Thresholding ELVT unmasked\n", apic_id); 1506 return (APIC_ELVT_MCA); 1507 } 1508 1509 void 1510 lapic_handle_error(void) 1511 { 1512 uint32_t esr; 1513 1514 /* 1515 * Read the contents of the error status register. Write to 1516 * the register first before reading from it to force the APIC 1517 * to update its value to indicate any errors that have 1518 * occurred since the previous write to the register. 1519 */ 1520 lapic_write32(LAPIC_ESR, 0); 1521 esr = lapic_read32(LAPIC_ESR); 1522 1523 printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr); 1524 lapic_eoi(); 1525 } 1526 1527 static u_int 1528 native_apic_cpuid(u_int apic_id) 1529 { 1530 #ifdef SMP 1531 return apic_cpuids[apic_id]; 1532 #else 1533 return 0; 1534 #endif 1535 } 1536 1537 /* Request a free IDT vector to be used by the specified IRQ. */ 1538 static u_int 1539 native_apic_alloc_vector(u_int apic_id, u_int irq) 1540 { 1541 u_int vector; 1542 1543 KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); 1544 1545 /* 1546 * Search for a free vector. Currently we just use a very simple 1547 * algorithm to find the first free vector. 1548 */ 1549 mtx_lock_spin(&icu_lock); 1550 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1551 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) 1552 continue; 1553 lapics[apic_id].la_ioint_irqs[vector] = irq; 1554 mtx_unlock_spin(&icu_lock); 1555 return (vector + APIC_IO_INTS); 1556 } 1557 mtx_unlock_spin(&icu_lock); 1558 return (0); 1559 } 1560 1561 /* 1562 * Request 'count' free contiguous IDT vectors to be used by 'count' 1563 * IRQs. 'count' must be a power of two and the vectors will be 1564 * aligned on a boundary of 'align'. If the request cannot be 1565 * satisfied, 0 is returned. 1566 */ 1567 static u_int 1568 native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) 1569 { 1570 u_int first, run, vector; 1571 1572 KASSERT(powerof2(count), ("bad count")); 1573 KASSERT(powerof2(align), ("bad align")); 1574 KASSERT(align >= count, ("align < count")); 1575 #ifdef INVARIANTS 1576 for (run = 0; run < count; run++) 1577 KASSERT(irqs[run] < num_io_irqs, ("Invalid IRQ %u at index %u", 1578 irqs[run], run)); 1579 #endif 1580 1581 /* 1582 * Search for 'count' free vectors. As with apic_alloc_vector(), 1583 * this just uses a simple first fit algorithm. 1584 */ 1585 run = 0; 1586 first = 0; 1587 mtx_lock_spin(&icu_lock); 1588 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1589 /* Vector is in use, end run. */ 1590 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) { 1591 run = 0; 1592 first = 0; 1593 continue; 1594 } 1595 1596 /* Start a new run if run == 0 and vector is aligned. */ 1597 if (run == 0) { 1598 if ((vector & (align - 1)) != 0) 1599 continue; 1600 first = vector; 1601 } 1602 run++; 1603 1604 /* Keep looping if the run isn't long enough yet. */ 1605 if (run < count) 1606 continue; 1607 1608 /* Found a run, assign IRQs and return the first vector. */ 1609 for (vector = 0; vector < count; vector++) 1610 lapics[apic_id].la_ioint_irqs[first + vector] = 1611 irqs[vector]; 1612 mtx_unlock_spin(&icu_lock); 1613 return (first + APIC_IO_INTS); 1614 } 1615 mtx_unlock_spin(&icu_lock); 1616 printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count); 1617 return (0); 1618 } 1619 1620 /* 1621 * Enable a vector for a particular apic_id. Since all lapics share idt 1622 * entries and ioint_handlers this enables the vector on all lapics. lapics 1623 * which do not have the vector configured would report spurious interrupts 1624 * should it fire. 1625 */ 1626 static void 1627 native_apic_enable_vector(u_int apic_id, u_int vector) 1628 { 1629 1630 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1631 KASSERT(ioint_handlers[vector / 32] != NULL, 1632 ("No ISR handler for vector %u", vector)); 1633 #ifdef KDTRACE_HOOKS 1634 KASSERT(vector != IDT_DTRACE_RET, 1635 ("Attempt to overwrite DTrace entry")); 1636 #endif 1637 setidt(vector, (pti ? ioint_pti_handlers : ioint_handlers)[vector / 32], 1638 SDT_APIC, SEL_KPL, GSEL_APIC); 1639 } 1640 1641 static void 1642 native_apic_disable_vector(u_int apic_id, u_int vector) 1643 { 1644 1645 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1646 #ifdef KDTRACE_HOOKS 1647 KASSERT(vector != IDT_DTRACE_RET, 1648 ("Attempt to overwrite DTrace entry")); 1649 #endif 1650 KASSERT(ioint_handlers[vector / 32] != NULL, 1651 ("No ISR handler for vector %u", vector)); 1652 #ifdef notyet 1653 /* 1654 * We can not currently clear the idt entry because other cpus 1655 * may have a valid vector at this offset. 1656 */ 1657 setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC, 1658 SEL_KPL, GSEL_APIC); 1659 #endif 1660 } 1661 1662 /* Release an APIC vector when it's no longer in use. */ 1663 static void 1664 native_apic_free_vector(u_int apic_id, u_int vector, u_int irq) 1665 { 1666 struct thread *td; 1667 1668 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1669 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1670 ("Vector %u does not map to an IRQ line", vector)); 1671 KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); 1672 KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] == 1673 irq, ("IRQ mismatch")); 1674 #ifdef KDTRACE_HOOKS 1675 KASSERT(vector != IDT_DTRACE_RET, 1676 ("Attempt to overwrite DTrace entry")); 1677 #endif 1678 1679 /* 1680 * Bind us to the cpu that owned the vector before freeing it so 1681 * we don't lose an interrupt delivery race. 1682 */ 1683 td = curthread; 1684 if (!rebooting) { 1685 thread_lock(td); 1686 if (sched_is_bound(td)) 1687 panic("apic_free_vector: Thread already bound.\n"); 1688 sched_bind(td, apic_cpuid(apic_id)); 1689 thread_unlock(td); 1690 } 1691 mtx_lock_spin(&icu_lock); 1692 lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = IRQ_FREE; 1693 mtx_unlock_spin(&icu_lock); 1694 if (!rebooting) { 1695 thread_lock(td); 1696 sched_unbind(td); 1697 thread_unlock(td); 1698 } 1699 } 1700 1701 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */ 1702 static u_int 1703 apic_idt_to_irq(u_int apic_id, u_int vector) 1704 { 1705 int irq; 1706 1707 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1708 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1709 ("Vector %u does not map to an IRQ line", vector)); 1710 #ifdef KDTRACE_HOOKS 1711 KASSERT(vector != IDT_DTRACE_RET, 1712 ("Attempt to overwrite DTrace entry")); 1713 #endif 1714 irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]; 1715 if (irq < 0) 1716 irq = 0; 1717 return (irq); 1718 } 1719 1720 #ifdef DDB 1721 /* 1722 * Dump data about APIC IDT vector mappings. 1723 */ 1724 DB_SHOW_COMMAND(apic, db_show_apic) 1725 { 1726 struct intsrc *isrc; 1727 int i, verbose; 1728 u_int apic_id; 1729 u_int irq; 1730 1731 if (strcmp(modif, "vv") == 0) 1732 verbose = 2; 1733 else if (strcmp(modif, "v") == 0) 1734 verbose = 1; 1735 else 1736 verbose = 0; 1737 for (apic_id = 0; apic_id <= max_apic_id; apic_id++) { 1738 if (lapics[apic_id].la_present == 0) 1739 continue; 1740 db_printf("Interrupts bound to lapic %u\n", apic_id); 1741 for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) { 1742 irq = lapics[apic_id].la_ioint_irqs[i]; 1743 if (irq == IRQ_FREE || irq == IRQ_SYSCALL) 1744 continue; 1745 #ifdef KDTRACE_HOOKS 1746 if (irq == IRQ_DTRACE_RET) 1747 continue; 1748 #endif 1749 #ifdef XENHVM 1750 if (irq == IRQ_EVTCHN) 1751 continue; 1752 #endif 1753 db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); 1754 if (irq == IRQ_TIMER) 1755 db_printf("lapic timer\n"); 1756 else if (irq < num_io_irqs) { 1757 isrc = intr_lookup_source(irq); 1758 if (isrc == NULL || verbose == 0) 1759 db_printf("IRQ %u\n", irq); 1760 else 1761 db_dump_intr_event(isrc->is_event, 1762 verbose == 2); 1763 } else 1764 db_printf("IRQ %u ???\n", irq); 1765 } 1766 } 1767 } 1768 1769 static void 1770 dump_mask(const char *prefix, uint32_t v, int base) 1771 { 1772 int i, first; 1773 1774 first = 1; 1775 for (i = 0; i < 32; i++) 1776 if (v & (1 << i)) { 1777 if (first) { 1778 db_printf("%s:", prefix); 1779 first = 0; 1780 } 1781 db_printf(" %02x", base + i); 1782 } 1783 if (!first) 1784 db_printf("\n"); 1785 } 1786 1787 /* Show info from the lapic regs for this CPU. */ 1788 DB_SHOW_COMMAND(lapic, db_show_lapic) 1789 { 1790 uint32_t v; 1791 1792 db_printf("lapic ID = %d\n", lapic_id()); 1793 v = lapic_read32(LAPIC_VERSION); 1794 db_printf("version = %d.%d\n", (v & APIC_VER_VERSION) >> 4, 1795 v & 0xf); 1796 db_printf("max LVT = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT); 1797 v = lapic_read32(LAPIC_SVR); 1798 db_printf("SVR = %02x (%s)\n", v & APIC_SVR_VECTOR, 1799 v & APIC_SVR_ENABLE ? "enabled" : "disabled"); 1800 db_printf("TPR = %02x\n", lapic_read32(LAPIC_TPR)); 1801 1802 #define dump_field(prefix, regn, index) \ 1803 dump_mask(__XSTRING(prefix ## index), \ 1804 lapic_read32(LAPIC_ ## regn ## index), \ 1805 index * 32) 1806 1807 db_printf("In-service Interrupts:\n"); 1808 dump_field(isr, ISR, 0); 1809 dump_field(isr, ISR, 1); 1810 dump_field(isr, ISR, 2); 1811 dump_field(isr, ISR, 3); 1812 dump_field(isr, ISR, 4); 1813 dump_field(isr, ISR, 5); 1814 dump_field(isr, ISR, 6); 1815 dump_field(isr, ISR, 7); 1816 1817 db_printf("TMR Interrupts:\n"); 1818 dump_field(tmr, TMR, 0); 1819 dump_field(tmr, TMR, 1); 1820 dump_field(tmr, TMR, 2); 1821 dump_field(tmr, TMR, 3); 1822 dump_field(tmr, TMR, 4); 1823 dump_field(tmr, TMR, 5); 1824 dump_field(tmr, TMR, 6); 1825 dump_field(tmr, TMR, 7); 1826 1827 db_printf("IRR Interrupts:\n"); 1828 dump_field(irr, IRR, 0); 1829 dump_field(irr, IRR, 1); 1830 dump_field(irr, IRR, 2); 1831 dump_field(irr, IRR, 3); 1832 dump_field(irr, IRR, 4); 1833 dump_field(irr, IRR, 5); 1834 dump_field(irr, IRR, 6); 1835 dump_field(irr, IRR, 7); 1836 1837 #undef dump_field 1838 } 1839 #endif 1840 1841 /* 1842 * APIC probing support code. This includes code to manage enumerators. 1843 */ 1844 1845 static SLIST_HEAD(, apic_enumerator) enumerators = 1846 SLIST_HEAD_INITIALIZER(enumerators); 1847 static struct apic_enumerator *best_enum; 1848 1849 void 1850 apic_register_enumerator(struct apic_enumerator *enumerator) 1851 { 1852 #ifdef INVARIANTS 1853 struct apic_enumerator *apic_enum; 1854 1855 SLIST_FOREACH(apic_enum, &enumerators, apic_next) { 1856 if (apic_enum == enumerator) 1857 panic("%s: Duplicate register of %s", __func__, 1858 enumerator->apic_name); 1859 } 1860 #endif 1861 SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next); 1862 } 1863 1864 /* 1865 * We have to look for CPU's very, very early because certain subsystems 1866 * want to know how many CPU's we have extremely early on in the boot 1867 * process. 1868 */ 1869 static void 1870 apic_init(void *dummy __unused) 1871 { 1872 struct apic_enumerator *enumerator; 1873 int retval, best; 1874 1875 /* We only support built in local APICs. */ 1876 if (!(cpu_feature & CPUID_APIC)) 1877 return; 1878 1879 /* Don't probe if APIC mode is disabled. */ 1880 if (resource_disabled("apic", 0)) 1881 return; 1882 1883 /* Probe all the enumerators to find the best match. */ 1884 best_enum = NULL; 1885 best = 0; 1886 SLIST_FOREACH(enumerator, &enumerators, apic_next) { 1887 retval = enumerator->apic_probe(); 1888 if (retval > 0) 1889 continue; 1890 if (best_enum == NULL || best < retval) { 1891 best_enum = enumerator; 1892 best = retval; 1893 } 1894 } 1895 if (best_enum == NULL) { 1896 if (bootverbose) 1897 printf("APIC: Could not find any APICs.\n"); 1898 #ifndef DEV_ATPIC 1899 panic("running without device atpic requires a local APIC"); 1900 #endif 1901 return; 1902 } 1903 1904 if (bootverbose) 1905 printf("APIC: Using the %s enumerator.\n", 1906 best_enum->apic_name); 1907 1908 #ifdef I686_CPU 1909 /* 1910 * To work around an errata, we disable the local APIC on some 1911 * CPUs during early startup. We need to turn the local APIC back 1912 * on on such CPUs now. 1913 */ 1914 ppro_reenable_apic(); 1915 #endif 1916 1917 /* Probe the CPU's in the system. */ 1918 retval = best_enum->apic_probe_cpus(); 1919 if (retval != 0) 1920 printf("%s: Failed to probe CPUs: returned %d\n", 1921 best_enum->apic_name, retval); 1922 1923 } 1924 SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL); 1925 1926 /* 1927 * Setup the local APIC. We have to do this prior to starting up the APs 1928 * in the SMP case. 1929 */ 1930 static void 1931 apic_setup_local(void *dummy __unused) 1932 { 1933 int retval; 1934 1935 if (best_enum == NULL) 1936 return; 1937 1938 lapics = malloc(sizeof(*lapics) * (max_apic_id + 1), M_LAPIC, 1939 M_WAITOK | M_ZERO); 1940 1941 /* Initialize the local APIC. */ 1942 retval = best_enum->apic_setup_local(); 1943 if (retval != 0) 1944 printf("%s: Failed to setup the local APIC: returned %d\n", 1945 best_enum->apic_name, retval); 1946 } 1947 SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL); 1948 1949 /* 1950 * Setup the I/O APICs. 1951 */ 1952 static void 1953 apic_setup_io(void *dummy __unused) 1954 { 1955 int retval; 1956 1957 if (best_enum == NULL) 1958 return; 1959 1960 /* 1961 * Local APIC must be registered before other PICs and pseudo PICs 1962 * for proper suspend/resume order. 1963 */ 1964 intr_register_pic(&lapic_pic); 1965 1966 retval = best_enum->apic_setup_io(); 1967 if (retval != 0) 1968 printf("%s: Failed to setup I/O APICs: returned %d\n", 1969 best_enum->apic_name, retval); 1970 1971 /* 1972 * Finish setting up the local APIC on the BSP once we know 1973 * how to properly program the LINT pins. In particular, this 1974 * enables the EOI suppression mode, if LAPIC supports it and 1975 * user did not disable the mode. 1976 */ 1977 lapic_setup(1); 1978 if (bootverbose) 1979 lapic_dump("BSP"); 1980 1981 /* Enable the MSI "pic". */ 1982 msi_init(); 1983 1984 #ifdef XENHVM 1985 xen_intr_alloc_irqs(); 1986 #endif 1987 } 1988 SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL); 1989 1990 #ifdef SMP 1991 /* 1992 * Inter Processor Interrupt functions. The lapic_ipi_*() functions are 1993 * private to the MD code. The public interface for the rest of the 1994 * kernel is defined in mp_machdep.c. 1995 */ 1996 1997 /* 1998 * Wait delay microseconds for IPI to be sent. If delay is -1, we 1999 * wait forever. 2000 */ 2001 static int 2002 native_lapic_ipi_wait(int delay) 2003 { 2004 uint64_t rx; 2005 2006 /* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */ 2007 if (x2apic_mode) 2008 return (1); 2009 2010 for (rx = 0; delay == -1 || rx < lapic_ipi_wait_mult * delay; rx++) { 2011 if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) == 2012 APIC_DELSTAT_IDLE) 2013 return (1); 2014 ia32_pause(); 2015 } 2016 return (0); 2017 } 2018 2019 static void 2020 native_lapic_ipi_raw(register_t icrlo, u_int dest) 2021 { 2022 uint32_t icrhi; 2023 2024 /* XXX: Need more sanity checking of icrlo? */ 2025 KASSERT(x2apic_mode || lapic_map != NULL, 2026 ("%s called too early", __func__)); 2027 KASSERT(x2apic_mode || 2028 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 2029 ("%s: invalid dest field", __func__)); 2030 KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0, 2031 ("%s: reserved bits set in ICR LO register", __func__)); 2032 2033 if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) { 2034 if (x2apic_mode) 2035 icrhi = dest; 2036 else 2037 icrhi = dest << APIC_ID_SHIFT; 2038 lapic_write_icr(icrhi, icrlo); 2039 } else { 2040 lapic_write_icr_lo(icrlo); 2041 } 2042 } 2043 2044 #ifdef DETECT_DEADLOCK 2045 #define AFTER_SPIN 50 2046 #endif 2047 2048 static void 2049 native_lapic_ipi_vectored(u_int vector, int dest) 2050 { 2051 register_t icrlo, destfield; 2052 2053 KASSERT((vector & ~APIC_VECTOR_MASK) == 0, 2054 ("%s: invalid vector %d", __func__, vector)); 2055 2056 destfield = 0; 2057 switch (dest) { 2058 case APIC_IPI_DEST_SELF: 2059 if (x2apic_mode && vector < IPI_NMI_FIRST) { 2060 lapic_write_self_ipi(vector); 2061 return; 2062 } 2063 icrlo = APIC_DEST_SELF; 2064 break; 2065 case APIC_IPI_DEST_ALL: 2066 icrlo = APIC_DEST_ALLISELF; 2067 break; 2068 case APIC_IPI_DEST_OTHERS: 2069 icrlo = APIC_DEST_ALLESELF; 2070 break; 2071 default: 2072 icrlo = 0; 2073 KASSERT(x2apic_mode || 2074 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 2075 ("%s: invalid destination 0x%x", __func__, dest)); 2076 destfield = dest; 2077 } 2078 2079 /* 2080 * NMI IPIs are just fake vectors used to send a NMI. Use special rules 2081 * regarding NMIs if passed, otherwise specify the vector. 2082 */ 2083 if (vector >= IPI_NMI_FIRST) 2084 icrlo |= APIC_DELMODE_NMI; 2085 else 2086 icrlo |= vector | APIC_DELMODE_FIXED; 2087 icrlo |= APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT; 2088 2089 /* Wait for an earlier IPI to finish. */ 2090 if (!lapic_ipi_wait(lapic_ds_idle_timeout)) { 2091 if (KERNEL_PANICKED()) 2092 return; 2093 else 2094 panic("APIC: Previous IPI is stuck"); 2095 } 2096 2097 lapic_ipi_raw(icrlo, destfield); 2098 2099 #ifdef DETECT_DEADLOCK 2100 /* Wait for IPI to be delivered. */ 2101 if (!lapic_ipi_wait(AFTER_SPIN)) { 2102 #ifdef needsattention 2103 /* 2104 * XXX FIXME: 2105 * 2106 * The above function waits for the message to actually be 2107 * delivered. It breaks out after an arbitrary timeout 2108 * since the message should eventually be delivered (at 2109 * least in theory) and that if it wasn't we would catch 2110 * the failure with the check above when the next IPI is 2111 * sent. 2112 * 2113 * We could skip this wait entirely, EXCEPT it probably 2114 * protects us from other routines that assume that the 2115 * message was delivered and acted upon when this function 2116 * returns. 2117 */ 2118 printf("APIC: IPI might be stuck\n"); 2119 #else /* !needsattention */ 2120 /* Wait until mesage is sent without a timeout. */ 2121 while (lapic_read_icr_lo() & APIC_DELSTAT_PEND) 2122 ia32_pause(); 2123 #endif /* needsattention */ 2124 } 2125 #endif /* DETECT_DEADLOCK */ 2126 } 2127 2128 #endif /* SMP */ 2129 2130 /* 2131 * Since the IDT is shared by all CPUs the IPI slot update needs to be globally 2132 * visible. 2133 * 2134 * Consider the case where an IPI is generated immediately after allocation: 2135 * vector = lapic_ipi_alloc(ipifunc); 2136 * ipi_selected(other_cpus, vector); 2137 * 2138 * In xAPIC mode a write to ICR_LO has serializing semantics because the 2139 * APIC page is mapped as an uncached region. In x2APIC mode there is an 2140 * explicit 'mfence' before the ICR MSR is written. Therefore in both cases 2141 * the IDT slot update is globally visible before the IPI is delivered. 2142 */ 2143 static int 2144 native_lapic_ipi_alloc(inthand_t *ipifunc) 2145 { 2146 struct gate_descriptor *ip; 2147 long func; 2148 int idx, vector; 2149 2150 KASSERT(ipifunc != &IDTVEC(rsvd) && ipifunc != &IDTVEC(rsvd_pti), 2151 ("invalid ipifunc %p", ipifunc)); 2152 2153 vector = -1; 2154 mtx_lock_spin(&icu_lock); 2155 for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) { 2156 ip = &idt[idx]; 2157 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2158 #ifdef __i386__ 2159 func -= setidt_disp; 2160 #endif 2161 if ((!pti && func == (uintptr_t)&IDTVEC(rsvd)) || 2162 (pti && func == (uintptr_t)&IDTVEC(rsvd_pti))) { 2163 vector = idx; 2164 setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC); 2165 break; 2166 } 2167 } 2168 mtx_unlock_spin(&icu_lock); 2169 return (vector); 2170 } 2171 2172 static void 2173 native_lapic_ipi_free(int vector) 2174 { 2175 struct gate_descriptor *ip; 2176 long func; 2177 2178 KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST, 2179 ("%s: invalid vector %d", __func__, vector)); 2180 2181 mtx_lock_spin(&icu_lock); 2182 ip = &idt[vector]; 2183 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2184 #ifdef __i386__ 2185 func -= setidt_disp; 2186 #endif 2187 KASSERT(func != (uintptr_t)&IDTVEC(rsvd) && 2188 func != (uintptr_t)&IDTVEC(rsvd_pti), 2189 ("invalid idtfunc %#lx", func)); 2190 setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC, 2191 SEL_KPL, GSEL_APIC); 2192 mtx_unlock_spin(&icu_lock); 2193 } 2194