1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1996, by Steve Passe 5 * All rights reserved. 6 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. The name of the developer may NOT be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 3. Neither the name of the author nor the names of any co-contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Local APIC support on Pentium and later processors. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_atpic.h" 40 #include "opt_hwpmc_hooks.h" 41 42 #include "opt_ddb.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bus.h> 47 #include <sys/kernel.h> 48 #include <sys/lock.h> 49 #include <sys/malloc.h> 50 #include <sys/mutex.h> 51 #include <sys/pcpu.h> 52 #include <sys/proc.h> 53 #include <sys/sched.h> 54 #include <sys/smp.h> 55 #include <sys/sysctl.h> 56 #include <sys/timeet.h> 57 58 #include <vm/vm.h> 59 #include <vm/pmap.h> 60 61 #include <x86/apicreg.h> 62 #include <machine/clock.h> 63 #include <machine/cpufunc.h> 64 #include <machine/cputypes.h> 65 #include <machine/frame.h> 66 #include <machine/intr_machdep.h> 67 #include <x86/apicvar.h> 68 #include <x86/mca.h> 69 #include <machine/md_var.h> 70 #include <machine/smp.h> 71 #include <machine/specialreg.h> 72 #include <x86/init.h> 73 74 #ifdef DDB 75 #include <sys/interrupt.h> 76 #include <ddb/ddb.h> 77 #endif 78 79 #ifdef __amd64__ 80 #define SDT_APIC SDT_SYSIGT 81 #define GSEL_APIC 0 82 #else 83 #define SDT_APIC SDT_SYS386IGT 84 #define GSEL_APIC GSEL(GCODE_SEL, SEL_KPL) 85 #endif 86 87 static MALLOC_DEFINE(M_LAPIC, "local_apic", "Local APIC items"); 88 89 /* Sanity checks on IDT vectors. */ 90 CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT); 91 CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS); 92 CTASSERT(APIC_LOCAL_INTS == 240); 93 CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); 94 95 /* 96 * I/O interrupts use non-negative IRQ values. These values are used 97 * to mark unused IDT entries or IDT entries reserved for a non-I/O 98 * interrupt. 99 */ 100 #define IRQ_FREE -1 101 #define IRQ_TIMER -2 102 #define IRQ_SYSCALL -3 103 #define IRQ_DTRACE_RET -4 104 #define IRQ_EVTCHN -5 105 106 enum lat_timer_mode { 107 LAT_MODE_UNDEF = 0, 108 LAT_MODE_PERIODIC = 1, 109 LAT_MODE_ONESHOT = 2, 110 LAT_MODE_DEADLINE = 3, 111 }; 112 113 /* 114 * Support for local APICs. Local APICs manage interrupts on each 115 * individual processor as opposed to I/O APICs which receive interrupts 116 * from I/O devices and then forward them on to the local APICs. 117 * 118 * Local APICs can also send interrupts to each other thus providing the 119 * mechanism for IPIs. 120 */ 121 122 struct lvt { 123 u_int lvt_edgetrigger:1; 124 u_int lvt_activehi:1; 125 u_int lvt_masked:1; 126 u_int lvt_active:1; 127 u_int lvt_mode:16; 128 u_int lvt_vector:8; 129 }; 130 131 struct lapic { 132 struct lvt la_lvts[APIC_LVT_MAX + 1]; 133 struct lvt la_elvts[APIC_ELVT_MAX + 1]; 134 u_int la_id:8; 135 u_int la_cluster:4; 136 u_int la_cluster_id:2; 137 u_int la_present:1; 138 u_long *la_timer_count; 139 uint64_t la_timer_period; 140 enum lat_timer_mode la_timer_mode; 141 uint32_t lvt_timer_base; 142 uint32_t lvt_timer_last; 143 /* Include IDT_SYSCALL to make indexing easier. */ 144 int la_ioint_irqs[APIC_NUM_IOINTS + 1]; 145 } static *lapics; 146 147 /* Global defaults for local APIC LVT entries. */ 148 static struct lvt lvts[APIC_LVT_MAX + 1] = { 149 { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 }, /* LINT0: masked ExtINT */ 150 { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 }, /* LINT1: NMI */ 151 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT }, /* Timer */ 152 { 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT }, /* Error */ 153 { 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 }, /* PMC */ 154 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT }, /* Thermal */ 155 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT }, /* CMCI */ 156 }; 157 158 /* Global defaults for AMD local APIC ELVT entries. */ 159 static struct lvt elvts[APIC_ELVT_MAX + 1] = { 160 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 161 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, APIC_CMC_INT }, 162 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 163 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 164 }; 165 166 static inthand_t *ioint_handlers[] = { 167 NULL, /* 0 - 31 */ 168 IDTVEC(apic_isr1), /* 32 - 63 */ 169 IDTVEC(apic_isr2), /* 64 - 95 */ 170 IDTVEC(apic_isr3), /* 96 - 127 */ 171 IDTVEC(apic_isr4), /* 128 - 159 */ 172 IDTVEC(apic_isr5), /* 160 - 191 */ 173 IDTVEC(apic_isr6), /* 192 - 223 */ 174 IDTVEC(apic_isr7), /* 224 - 255 */ 175 }; 176 177 static inthand_t *ioint_pti_handlers[] = { 178 NULL, /* 0 - 31 */ 179 IDTVEC(apic_isr1_pti), /* 32 - 63 */ 180 IDTVEC(apic_isr2_pti), /* 64 - 95 */ 181 IDTVEC(apic_isr3_pti), /* 96 - 127 */ 182 IDTVEC(apic_isr4_pti), /* 128 - 159 */ 183 IDTVEC(apic_isr5_pti), /* 160 - 191 */ 184 IDTVEC(apic_isr6_pti), /* 192 - 223 */ 185 IDTVEC(apic_isr7_pti), /* 224 - 255 */ 186 }; 187 188 static u_int32_t lapic_timer_divisors[] = { 189 APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, 190 APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128 191 }; 192 193 extern inthand_t IDTVEC(rsvd_pti), IDTVEC(rsvd); 194 195 volatile char *lapic_map; 196 vm_paddr_t lapic_paddr; 197 int x2apic_mode; 198 int lapic_eoi_suppression; 199 static int lapic_timer_tsc_deadline; 200 static u_long lapic_timer_divisor, count_freq; 201 static struct eventtimer lapic_et; 202 #ifdef SMP 203 static uint64_t lapic_ipi_wait_mult; 204 #endif 205 unsigned int max_apic_id; 206 207 SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 208 "APIC options"); 209 SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, ""); 210 SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD, 211 &lapic_eoi_suppression, 0, ""); 212 SYSCTL_INT(_hw_apic, OID_AUTO, timer_tsc_deadline, CTLFLAG_RD, 213 &lapic_timer_tsc_deadline, 0, ""); 214 215 static void lapic_calibrate_initcount(struct lapic *la); 216 static void lapic_calibrate_deadline(struct lapic *la); 217 218 static uint32_t 219 lapic_read32(enum LAPIC_REGISTERS reg) 220 { 221 uint32_t res; 222 223 if (x2apic_mode) { 224 res = rdmsr32(MSR_APIC_000 + reg); 225 } else { 226 res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL); 227 } 228 return (res); 229 } 230 231 static void 232 lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val) 233 { 234 235 if (x2apic_mode) { 236 mfence(); 237 lfence(); 238 wrmsr(MSR_APIC_000 + reg, val); 239 } else { 240 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 241 } 242 } 243 244 static void 245 lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val) 246 { 247 248 if (x2apic_mode) { 249 wrmsr(MSR_APIC_000 + reg, val); 250 } else { 251 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 252 } 253 } 254 255 #ifdef SMP 256 static uint64_t 257 lapic_read_icr(void) 258 { 259 uint64_t v; 260 uint32_t vhi, vlo; 261 262 if (x2apic_mode) { 263 v = rdmsr(MSR_APIC_000 + LAPIC_ICR_LO); 264 } else { 265 vhi = lapic_read32(LAPIC_ICR_HI); 266 vlo = lapic_read32(LAPIC_ICR_LO); 267 v = ((uint64_t)vhi << 32) | vlo; 268 } 269 return (v); 270 } 271 272 static uint64_t 273 lapic_read_icr_lo(void) 274 { 275 276 return (lapic_read32(LAPIC_ICR_LO)); 277 } 278 279 static void 280 lapic_write_icr(uint32_t vhi, uint32_t vlo) 281 { 282 uint64_t v; 283 284 if (x2apic_mode) { 285 v = ((uint64_t)vhi << 32) | vlo; 286 mfence(); 287 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v); 288 } else { 289 lapic_write32(LAPIC_ICR_HI, vhi); 290 lapic_write32(LAPIC_ICR_LO, vlo); 291 } 292 } 293 #endif /* SMP */ 294 295 static void 296 native_lapic_enable_x2apic(void) 297 { 298 uint64_t apic_base; 299 300 apic_base = rdmsr(MSR_APICBASE); 301 apic_base |= APICBASE_X2APIC | APICBASE_ENABLED; 302 wrmsr(MSR_APICBASE, apic_base); 303 } 304 305 static bool 306 native_lapic_is_x2apic(void) 307 { 308 uint64_t apic_base; 309 310 apic_base = rdmsr(MSR_APICBASE); 311 return ((apic_base & (APICBASE_X2APIC | APICBASE_ENABLED)) == 312 (APICBASE_X2APIC | APICBASE_ENABLED)); 313 } 314 315 static void lapic_enable(void); 316 static void lapic_resume(struct pic *pic, bool suspend_cancelled); 317 static void lapic_timer_oneshot(struct lapic *); 318 static void lapic_timer_oneshot_nointr(struct lapic *, uint32_t); 319 static void lapic_timer_periodic(struct lapic *); 320 static void lapic_timer_deadline(struct lapic *); 321 static void lapic_timer_stop(struct lapic *); 322 static void lapic_timer_set_divisor(u_int divisor); 323 static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value); 324 static int lapic_et_start(struct eventtimer *et, 325 sbintime_t first, sbintime_t period); 326 static int lapic_et_stop(struct eventtimer *et); 327 static u_int apic_idt_to_irq(u_int apic_id, u_int vector); 328 static void lapic_set_tpr(u_int vector); 329 330 struct pic lapic_pic = { .pic_resume = lapic_resume }; 331 332 /* Forward declarations for apic_ops */ 333 static void native_lapic_create(u_int apic_id, int boot_cpu); 334 static void native_lapic_init(vm_paddr_t addr); 335 static void native_lapic_xapic_mode(void); 336 static void native_lapic_setup(int boot); 337 static void native_lapic_dump(const char *str); 338 static void native_lapic_disable(void); 339 static void native_lapic_eoi(void); 340 static int native_lapic_id(void); 341 static int native_lapic_intr_pending(u_int vector); 342 static u_int native_apic_cpuid(u_int apic_id); 343 static u_int native_apic_alloc_vector(u_int apic_id, u_int irq); 344 static u_int native_apic_alloc_vectors(u_int apic_id, u_int *irqs, 345 u_int count, u_int align); 346 static void native_apic_disable_vector(u_int apic_id, u_int vector); 347 static void native_apic_enable_vector(u_int apic_id, u_int vector); 348 static void native_apic_free_vector(u_int apic_id, u_int vector, u_int irq); 349 static void native_lapic_set_logical_id(u_int apic_id, u_int cluster, 350 u_int cluster_id); 351 static int native_lapic_enable_pmc(void); 352 static void native_lapic_disable_pmc(void); 353 static void native_lapic_reenable_pmc(void); 354 static void native_lapic_enable_cmc(void); 355 static int native_lapic_enable_mca_elvt(void); 356 static int native_lapic_set_lvt_mask(u_int apic_id, u_int lvt, 357 u_char masked); 358 static int native_lapic_set_lvt_mode(u_int apic_id, u_int lvt, 359 uint32_t mode); 360 static int native_lapic_set_lvt_polarity(u_int apic_id, u_int lvt, 361 enum intr_polarity pol); 362 static int native_lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, 363 enum intr_trigger trigger); 364 #ifdef SMP 365 static void native_lapic_ipi_raw(register_t icrlo, u_int dest); 366 static void native_lapic_ipi_vectored(u_int vector, int dest); 367 static int native_lapic_ipi_wait(int delay); 368 #endif /* SMP */ 369 static int native_lapic_ipi_alloc(inthand_t *ipifunc); 370 static void native_lapic_ipi_free(int vector); 371 372 struct apic_ops apic_ops = { 373 .create = native_lapic_create, 374 .init = native_lapic_init, 375 .xapic_mode = native_lapic_xapic_mode, 376 .is_x2apic = native_lapic_is_x2apic, 377 .setup = native_lapic_setup, 378 .dump = native_lapic_dump, 379 .disable = native_lapic_disable, 380 .eoi = native_lapic_eoi, 381 .id = native_lapic_id, 382 .intr_pending = native_lapic_intr_pending, 383 .set_logical_id = native_lapic_set_logical_id, 384 .cpuid = native_apic_cpuid, 385 .alloc_vector = native_apic_alloc_vector, 386 .alloc_vectors = native_apic_alloc_vectors, 387 .enable_vector = native_apic_enable_vector, 388 .disable_vector = native_apic_disable_vector, 389 .free_vector = native_apic_free_vector, 390 .enable_pmc = native_lapic_enable_pmc, 391 .disable_pmc = native_lapic_disable_pmc, 392 .reenable_pmc = native_lapic_reenable_pmc, 393 .enable_cmc = native_lapic_enable_cmc, 394 .enable_mca_elvt = native_lapic_enable_mca_elvt, 395 #ifdef SMP 396 .ipi_raw = native_lapic_ipi_raw, 397 .ipi_vectored = native_lapic_ipi_vectored, 398 .ipi_wait = native_lapic_ipi_wait, 399 #endif 400 .ipi_alloc = native_lapic_ipi_alloc, 401 .ipi_free = native_lapic_ipi_free, 402 .set_lvt_mask = native_lapic_set_lvt_mask, 403 .set_lvt_mode = native_lapic_set_lvt_mode, 404 .set_lvt_polarity = native_lapic_set_lvt_polarity, 405 .set_lvt_triggermode = native_lapic_set_lvt_triggermode, 406 }; 407 408 static uint32_t 409 lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value) 410 { 411 412 value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM | 413 APIC_LVT_VECTOR); 414 if (lvt->lvt_edgetrigger == 0) 415 value |= APIC_LVT_TM; 416 if (lvt->lvt_activehi == 0) 417 value |= APIC_LVT_IIPP_INTALO; 418 if (lvt->lvt_masked) 419 value |= APIC_LVT_M; 420 value |= lvt->lvt_mode; 421 switch (lvt->lvt_mode) { 422 case APIC_LVT_DM_NMI: 423 case APIC_LVT_DM_SMI: 424 case APIC_LVT_DM_INIT: 425 case APIC_LVT_DM_EXTINT: 426 if (!lvt->lvt_edgetrigger && bootverbose) { 427 printf("lapic%u: Forcing LINT%u to edge trigger\n", 428 la->la_id, pin); 429 value &= ~APIC_LVT_TM; 430 } 431 /* Use a vector of 0. */ 432 break; 433 case APIC_LVT_DM_FIXED: 434 value |= lvt->lvt_vector; 435 break; 436 default: 437 panic("bad APIC LVT delivery mode: %#x\n", value); 438 } 439 return (value); 440 } 441 442 static uint32_t 443 lvt_mode(struct lapic *la, u_int pin, uint32_t value) 444 { 445 struct lvt *lvt; 446 447 KASSERT(pin <= APIC_LVT_MAX, 448 ("%s: pin %u out of range", __func__, pin)); 449 if (la->la_lvts[pin].lvt_active) 450 lvt = &la->la_lvts[pin]; 451 else 452 lvt = &lvts[pin]; 453 454 return (lvt_mode_impl(la, lvt, pin, value)); 455 } 456 457 static uint32_t 458 elvt_mode(struct lapic *la, u_int idx, uint32_t value) 459 { 460 struct lvt *elvt; 461 462 KASSERT(idx <= APIC_ELVT_MAX, 463 ("%s: idx %u out of range", __func__, idx)); 464 465 elvt = &la->la_elvts[idx]; 466 KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx)); 467 KASSERT(elvt->lvt_edgetrigger, 468 ("%s: ELVT%u is not edge triggered", __func__, idx)); 469 KASSERT(elvt->lvt_activehi, 470 ("%s: ELVT%u is not active high", __func__, idx)); 471 return (lvt_mode_impl(la, elvt, idx, value)); 472 } 473 474 /* 475 * Map the local APIC and setup necessary interrupt vectors. 476 */ 477 static void 478 native_lapic_init(vm_paddr_t addr) 479 { 480 #ifdef SMP 481 uint64_t r, r1, r2, rx; 482 #endif 483 uint32_t ver; 484 int i; 485 bool arat; 486 487 /* 488 * Enable x2APIC mode if possible. Map the local APIC 489 * registers page. 490 * 491 * Keep the LAPIC registers page mapped uncached for x2APIC 492 * mode too, to have direct map page attribute set to 493 * uncached. This is needed to work around CPU errata present 494 * on all Intel processors. 495 */ 496 KASSERT(trunc_page(addr) == addr, 497 ("local APIC not aligned on a page boundary")); 498 lapic_paddr = addr; 499 lapic_map = pmap_mapdev(addr, PAGE_SIZE); 500 if (x2apic_mode) { 501 native_lapic_enable_x2apic(); 502 lapic_map = NULL; 503 } 504 505 /* Setup the spurious interrupt handler. */ 506 setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL, 507 GSEL_APIC); 508 509 /* Perform basic initialization of the BSP's local APIC. */ 510 lapic_enable(); 511 512 /* Set BSP's per-CPU local APIC ID. */ 513 PCPU_SET(apic_id, lapic_id()); 514 515 /* Local APIC timer interrupt. */ 516 setidt(APIC_TIMER_INT, pti ? IDTVEC(timerint_pti) : IDTVEC(timerint), 517 SDT_APIC, SEL_KPL, GSEL_APIC); 518 519 /* Local APIC error interrupt. */ 520 setidt(APIC_ERROR_INT, pti ? IDTVEC(errorint_pti) : IDTVEC(errorint), 521 SDT_APIC, SEL_KPL, GSEL_APIC); 522 523 /* XXX: Thermal interrupt */ 524 525 /* Local APIC CMCI. */ 526 setidt(APIC_CMC_INT, pti ? IDTVEC(cmcint_pti) : IDTVEC(cmcint), 527 SDT_APIC, SEL_KPL, GSEL_APIC); 528 529 if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) { 530 /* Set if APIC timer runs in C3. */ 531 arat = (cpu_power_eax & CPUTPM1_ARAT); 532 533 bzero(&lapic_et, sizeof(lapic_et)); 534 lapic_et.et_name = "LAPIC"; 535 lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT | 536 ET_FLAGS_PERCPU; 537 lapic_et.et_quality = 600; 538 if (!arat) { 539 lapic_et.et_flags |= ET_FLAGS_C3STOP; 540 lapic_et.et_quality = 100; 541 } 542 if ((cpu_feature & CPUID_TSC) != 0 && 543 (cpu_feature2 & CPUID2_TSCDLT) != 0 && 544 tsc_is_invariant && tsc_freq != 0) { 545 lapic_timer_tsc_deadline = 1; 546 TUNABLE_INT_FETCH("hw.lapic_tsc_deadline", 547 &lapic_timer_tsc_deadline); 548 } 549 550 lapic_et.et_frequency = 0; 551 /* We don't know frequency yet, so trying to guess. */ 552 lapic_et.et_min_period = 0x00001000LL; 553 lapic_et.et_max_period = SBT_1S; 554 lapic_et.et_start = lapic_et_start; 555 lapic_et.et_stop = lapic_et_stop; 556 lapic_et.et_priv = NULL; 557 et_register(&lapic_et); 558 } 559 560 /* 561 * Set lapic_eoi_suppression after lapic_enable(), to not 562 * enable suppression in the hardware prematurely. Note that 563 * we by default enable suppression even when system only has 564 * one IO-APIC, since EOI is broadcasted to all APIC agents, 565 * including CPUs, otherwise. 566 * 567 * It seems that at least some KVM versions report 568 * EOI_SUPPRESSION bit, but auto-EOI does not work. 569 */ 570 ver = lapic_read32(LAPIC_VERSION); 571 if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) { 572 lapic_eoi_suppression = 1; 573 if (vm_guest == VM_GUEST_KVM) { 574 if (bootverbose) 575 printf( 576 "KVM -- disabling lapic eoi suppression\n"); 577 lapic_eoi_suppression = 0; 578 } 579 TUNABLE_INT_FETCH("hw.lapic_eoi_suppression", 580 &lapic_eoi_suppression); 581 } 582 583 #ifdef SMP 584 #define LOOPS 100000 585 /* 586 * Calibrate the busy loop waiting for IPI ack in xAPIC mode. 587 * lapic_ipi_wait_mult contains the number of iterations which 588 * approximately delay execution for 1 microsecond (the 589 * argument to native_lapic_ipi_wait() is in microseconds). 590 * 591 * We assume that TSC is present and already measured. 592 * Possible TSC frequency jumps are irrelevant to the 593 * calibration loop below, the CPU clock management code is 594 * not yet started, and we do not enter sleep states. 595 */ 596 KASSERT((cpu_feature & CPUID_TSC) != 0 && tsc_freq != 0, 597 ("TSC not initialized")); 598 if (!x2apic_mode) { 599 r = rdtsc(); 600 for (rx = 0; rx < LOOPS; rx++) { 601 (void)lapic_read_icr_lo(); 602 ia32_pause(); 603 } 604 r = rdtsc() - r; 605 r1 = tsc_freq * LOOPS; 606 r2 = r * 1000000; 607 lapic_ipi_wait_mult = r1 >= r2 ? r1 / r2 : 1; 608 if (bootverbose) { 609 printf("LAPIC: ipi_wait() us multiplier %ju (r %ju " 610 "tsc %ju)\n", (uintmax_t)lapic_ipi_wait_mult, 611 (uintmax_t)r, (uintmax_t)tsc_freq); 612 } 613 } 614 #undef LOOPS 615 #endif /* SMP */ 616 } 617 618 /* 619 * Create a local APIC instance. 620 */ 621 static void 622 native_lapic_create(u_int apic_id, int boot_cpu) 623 { 624 int i; 625 626 if (apic_id > max_apic_id) { 627 printf("APIC: Ignoring local APIC with ID %d\n", apic_id); 628 if (boot_cpu) 629 panic("Can't ignore BSP"); 630 return; 631 } 632 KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u", 633 apic_id)); 634 635 /* 636 * Assume no local LVT overrides and a cluster of 0 and 637 * intra-cluster ID of 0. 638 */ 639 lapics[apic_id].la_present = 1; 640 lapics[apic_id].la_id = apic_id; 641 for (i = 0; i <= APIC_LVT_MAX; i++) { 642 lapics[apic_id].la_lvts[i] = lvts[i]; 643 lapics[apic_id].la_lvts[i].lvt_active = 0; 644 } 645 for (i = 0; i <= APIC_ELVT_MAX; i++) { 646 lapics[apic_id].la_elvts[i] = elvts[i]; 647 lapics[apic_id].la_elvts[i].lvt_active = 0; 648 } 649 for (i = 0; i <= APIC_NUM_IOINTS; i++) 650 lapics[apic_id].la_ioint_irqs[i] = IRQ_FREE; 651 lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; 652 lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = 653 IRQ_TIMER; 654 #ifdef KDTRACE_HOOKS 655 lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = 656 IRQ_DTRACE_RET; 657 #endif 658 #ifdef XENHVM 659 lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN; 660 #endif 661 662 663 #ifdef SMP 664 cpu_add(apic_id, boot_cpu); 665 #endif 666 } 667 668 static inline uint32_t 669 amd_read_ext_features(void) 670 { 671 uint32_t version; 672 673 if (cpu_vendor_id != CPU_VENDOR_AMD && 674 cpu_vendor_id != CPU_VENDOR_HYGON) 675 return (0); 676 version = lapic_read32(LAPIC_VERSION); 677 if ((version & APIC_VER_AMD_EXT_SPACE) != 0) 678 return (lapic_read32(LAPIC_EXT_FEATURES)); 679 else 680 return (0); 681 } 682 683 static inline uint32_t 684 amd_read_elvt_count(void) 685 { 686 uint32_t extf; 687 uint32_t count; 688 689 extf = amd_read_ext_features(); 690 count = (extf & APIC_EXTF_ELVT_MASK) >> APIC_EXTF_ELVT_SHIFT; 691 count = min(count, APIC_ELVT_MAX + 1); 692 return (count); 693 } 694 695 /* 696 * Dump contents of local APIC registers 697 */ 698 static void 699 native_lapic_dump(const char* str) 700 { 701 uint32_t version; 702 uint32_t maxlvt; 703 uint32_t extf; 704 int elvt_count; 705 int i; 706 707 version = lapic_read32(LAPIC_VERSION); 708 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 709 printf("cpu%d %s:\n", PCPU_GET(cpuid), str); 710 printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x", 711 lapic_read32(LAPIC_ID), version, 712 lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR)); 713 if ((cpu_feature2 & CPUID2_X2APIC) != 0) 714 printf(" x2APIC: %d", x2apic_mode); 715 printf("\n lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", 716 lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1), 717 lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR)); 718 printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x", 719 lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL), 720 lapic_read32(LAPIC_LVT_ERROR)); 721 if (maxlvt >= APIC_LVT_PMC) 722 printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT)); 723 printf("\n"); 724 if (maxlvt >= APIC_LVT_CMCI) 725 printf(" cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI)); 726 extf = amd_read_ext_features(); 727 if (extf != 0) { 728 printf(" AMD ext features: 0x%08x\n", extf); 729 elvt_count = amd_read_elvt_count(); 730 for (i = 0; i < elvt_count; i++) 731 printf(" AMD elvt%d: 0x%08x\n", i, 732 lapic_read32(LAPIC_EXT_LVT0 + i)); 733 } 734 } 735 736 static void 737 native_lapic_xapic_mode(void) 738 { 739 register_t saveintr; 740 741 saveintr = intr_disable(); 742 if (x2apic_mode) 743 native_lapic_enable_x2apic(); 744 intr_restore(saveintr); 745 } 746 747 static void 748 native_lapic_setup(int boot) 749 { 750 struct lapic *la; 751 uint32_t version; 752 uint32_t maxlvt; 753 register_t saveintr; 754 int elvt_count; 755 int i; 756 757 saveintr = intr_disable(); 758 759 la = &lapics[lapic_id()]; 760 KASSERT(la->la_present, ("missing APIC structure")); 761 version = lapic_read32(LAPIC_VERSION); 762 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 763 764 /* Initialize the TPR to allow all interrupts. */ 765 lapic_set_tpr(0); 766 767 /* Setup spurious vector and enable the local APIC. */ 768 lapic_enable(); 769 770 /* Program LINT[01] LVT entries. */ 771 lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0, 772 lapic_read32(LAPIC_LVT_LINT0))); 773 lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1, 774 lapic_read32(LAPIC_LVT_LINT1))); 775 776 /* Program the PMC LVT entry if present. */ 777 if (maxlvt >= APIC_LVT_PMC) { 778 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 779 LAPIC_LVT_PCINT)); 780 } 781 782 /* Program timer LVT. */ 783 la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER, 784 lapic_read32(LAPIC_LVT_TIMER)); 785 la->lvt_timer_last = la->lvt_timer_base; 786 lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base); 787 788 /* Calibrate the timer parameters using BSP. */ 789 if (boot && IS_BSP()) { 790 lapic_calibrate_initcount(la); 791 if (lapic_timer_tsc_deadline) 792 lapic_calibrate_deadline(la); 793 } 794 795 /* Setup the timer if configured. */ 796 if (la->la_timer_mode != LAT_MODE_UNDEF) { 797 KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor", 798 lapic_id())); 799 switch (la->la_timer_mode) { 800 case LAT_MODE_PERIODIC: 801 lapic_timer_set_divisor(lapic_timer_divisor); 802 lapic_timer_periodic(la); 803 break; 804 case LAT_MODE_ONESHOT: 805 lapic_timer_set_divisor(lapic_timer_divisor); 806 lapic_timer_oneshot(la); 807 break; 808 case LAT_MODE_DEADLINE: 809 lapic_timer_deadline(la); 810 break; 811 default: 812 panic("corrupted la_timer_mode %p %d", la, 813 la->la_timer_mode); 814 } 815 } 816 817 /* Program error LVT and clear any existing errors. */ 818 lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR, 819 lapic_read32(LAPIC_LVT_ERROR))); 820 lapic_write32(LAPIC_ESR, 0); 821 822 /* XXX: Thermal LVT */ 823 824 /* Program the CMCI LVT entry if present. */ 825 if (maxlvt >= APIC_LVT_CMCI) { 826 lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI, 827 lapic_read32(LAPIC_LVT_CMCI))); 828 } 829 830 elvt_count = amd_read_elvt_count(); 831 for (i = 0; i < elvt_count; i++) { 832 if (la->la_elvts[i].lvt_active) 833 lapic_write32(LAPIC_EXT_LVT0 + i, 834 elvt_mode(la, i, lapic_read32(LAPIC_EXT_LVT0 + i))); 835 } 836 837 intr_restore(saveintr); 838 } 839 840 static void 841 native_lapic_intrcnt(void *dummy __unused) 842 { 843 struct pcpu *pc; 844 struct lapic *la; 845 char buf[MAXCOMLEN + 1]; 846 847 /* If there are no APICs, skip this function. */ 848 if (lapics == NULL) 849 return; 850 851 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 852 la = &lapics[pc->pc_apic_id]; 853 if (!la->la_present) 854 continue; 855 856 snprintf(buf, sizeof(buf), "cpu%d:timer", pc->pc_cpuid); 857 intrcnt_add(buf, &la->la_timer_count); 858 } 859 } 860 SYSINIT(native_lapic_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, native_lapic_intrcnt, 861 NULL); 862 863 static void 864 native_lapic_reenable_pmc(void) 865 { 866 #ifdef HWPMC_HOOKS 867 uint32_t value; 868 869 value = lapic_read32(LAPIC_LVT_PCINT); 870 value &= ~APIC_LVT_M; 871 lapic_write32(LAPIC_LVT_PCINT, value); 872 #endif 873 } 874 875 #ifdef HWPMC_HOOKS 876 static void 877 lapic_update_pmc(void *dummy) 878 { 879 struct lapic *la; 880 881 la = &lapics[lapic_id()]; 882 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 883 lapic_read32(LAPIC_LVT_PCINT))); 884 } 885 #endif 886 887 static int 888 native_lapic_enable_pmc(void) 889 { 890 #ifdef HWPMC_HOOKS 891 u_int32_t maxlvt; 892 893 /* Fail if the local APIC is not present. */ 894 if (!x2apic_mode && lapic_map == NULL) 895 return (0); 896 897 /* Fail if the PMC LVT is not present. */ 898 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 899 if (maxlvt < APIC_LVT_PMC) 900 return (0); 901 902 lvts[APIC_LVT_PMC].lvt_masked = 0; 903 904 #ifdef EARLY_AP_STARTUP 905 MPASS(mp_ncpus == 1 || smp_started); 906 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 907 #else 908 #ifdef SMP 909 /* 910 * If hwpmc was loaded at boot time then the APs may not be 911 * started yet. In that case, don't forward the request to 912 * them as they will program the lvt when they start. 913 */ 914 if (smp_started) 915 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 916 else 917 #endif 918 lapic_update_pmc(NULL); 919 #endif 920 return (1); 921 #else 922 return (0); 923 #endif 924 } 925 926 static void 927 native_lapic_disable_pmc(void) 928 { 929 #ifdef HWPMC_HOOKS 930 u_int32_t maxlvt; 931 932 /* Fail if the local APIC is not present. */ 933 if (!x2apic_mode && lapic_map == NULL) 934 return; 935 936 /* Fail if the PMC LVT is not present. */ 937 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 938 if (maxlvt < APIC_LVT_PMC) 939 return; 940 941 lvts[APIC_LVT_PMC].lvt_masked = 1; 942 943 #ifdef SMP 944 /* The APs should always be started when hwpmc is unloaded. */ 945 KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early")); 946 #endif 947 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 948 #endif 949 } 950 951 static void 952 lapic_calibrate_initcount(struct lapic *la) 953 { 954 u_long value; 955 956 /* Start off with a divisor of 2 (power on reset default). */ 957 lapic_timer_divisor = 2; 958 /* Try to calibrate the local APIC timer. */ 959 do { 960 lapic_timer_set_divisor(lapic_timer_divisor); 961 lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT); 962 DELAY(1000000); 963 value = APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER); 964 if (value != APIC_TIMER_MAX_COUNT) 965 break; 966 lapic_timer_divisor <<= 1; 967 } while (lapic_timer_divisor <= 128); 968 if (lapic_timer_divisor > 128) 969 panic("lapic: Divisor too big"); 970 if (bootverbose) { 971 printf("lapic: Divisor %lu, Frequency %lu Hz\n", 972 lapic_timer_divisor, value); 973 } 974 count_freq = value; 975 } 976 977 static void 978 lapic_calibrate_deadline(struct lapic *la __unused) 979 { 980 981 if (bootverbose) { 982 printf("lapic: deadline tsc mode, Frequency %ju Hz\n", 983 (uintmax_t)tsc_freq); 984 } 985 } 986 987 static void 988 lapic_change_mode(struct eventtimer *et, struct lapic *la, 989 enum lat_timer_mode newmode) 990 { 991 992 if (la->la_timer_mode == newmode) 993 return; 994 switch (newmode) { 995 case LAT_MODE_PERIODIC: 996 lapic_timer_set_divisor(lapic_timer_divisor); 997 et->et_frequency = count_freq; 998 break; 999 case LAT_MODE_DEADLINE: 1000 et->et_frequency = tsc_freq; 1001 break; 1002 case LAT_MODE_ONESHOT: 1003 lapic_timer_set_divisor(lapic_timer_divisor); 1004 et->et_frequency = count_freq; 1005 break; 1006 default: 1007 panic("lapic_change_mode %d", newmode); 1008 } 1009 la->la_timer_mode = newmode; 1010 et->et_min_period = (0x00000002LLU << 32) / et->et_frequency; 1011 et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency; 1012 } 1013 1014 static int 1015 lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) 1016 { 1017 struct lapic *la; 1018 1019 la = &lapics[PCPU_GET(apic_id)]; 1020 if (period != 0) { 1021 lapic_change_mode(et, la, LAT_MODE_PERIODIC); 1022 la->la_timer_period = ((uint32_t)et->et_frequency * period) >> 1023 32; 1024 lapic_timer_periodic(la); 1025 } else if (lapic_timer_tsc_deadline) { 1026 lapic_change_mode(et, la, LAT_MODE_DEADLINE); 1027 la->la_timer_period = (et->et_frequency * first) >> 32; 1028 lapic_timer_deadline(la); 1029 } else { 1030 lapic_change_mode(et, la, LAT_MODE_ONESHOT); 1031 la->la_timer_period = ((uint32_t)et->et_frequency * first) >> 1032 32; 1033 lapic_timer_oneshot(la); 1034 } 1035 return (0); 1036 } 1037 1038 static int 1039 lapic_et_stop(struct eventtimer *et) 1040 { 1041 struct lapic *la; 1042 1043 la = &lapics[PCPU_GET(apic_id)]; 1044 lapic_timer_stop(la); 1045 la->la_timer_mode = LAT_MODE_UNDEF; 1046 return (0); 1047 } 1048 1049 static void 1050 native_lapic_disable(void) 1051 { 1052 uint32_t value; 1053 1054 /* Software disable the local APIC. */ 1055 value = lapic_read32(LAPIC_SVR); 1056 value &= ~APIC_SVR_SWEN; 1057 lapic_write32(LAPIC_SVR, value); 1058 } 1059 1060 static void 1061 lapic_enable(void) 1062 { 1063 uint32_t value; 1064 1065 /* Program the spurious vector to enable the local APIC. */ 1066 value = lapic_read32(LAPIC_SVR); 1067 value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS); 1068 value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT; 1069 if (lapic_eoi_suppression) 1070 value |= APIC_SVR_EOI_SUPPRESSION; 1071 lapic_write32(LAPIC_SVR, value); 1072 } 1073 1074 /* Reset the local APIC on the BSP during resume. */ 1075 static void 1076 lapic_resume(struct pic *pic, bool suspend_cancelled) 1077 { 1078 1079 lapic_setup(0); 1080 } 1081 1082 static int 1083 native_lapic_id(void) 1084 { 1085 uint32_t v; 1086 1087 KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped")); 1088 v = lapic_read32(LAPIC_ID); 1089 if (!x2apic_mode) 1090 v >>= APIC_ID_SHIFT; 1091 return (v); 1092 } 1093 1094 static int 1095 native_lapic_intr_pending(u_int vector) 1096 { 1097 uint32_t irr; 1098 1099 /* 1100 * The IRR registers are an array of registers each of which 1101 * only describes 32 interrupts in the low 32 bits. Thus, we 1102 * divide the vector by 32 to get the register index. 1103 * Finally, we modulus the vector by 32 to determine the 1104 * individual bit to test. 1105 */ 1106 irr = lapic_read32(LAPIC_IRR0 + vector / 32); 1107 return (irr & 1 << (vector % 32)); 1108 } 1109 1110 static void 1111 native_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) 1112 { 1113 struct lapic *la; 1114 1115 KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist", 1116 __func__, apic_id)); 1117 KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big", 1118 __func__, cluster)); 1119 KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID, 1120 ("%s: intra cluster id %u too big", __func__, cluster_id)); 1121 la = &lapics[apic_id]; 1122 la->la_cluster = cluster; 1123 la->la_cluster_id = cluster_id; 1124 } 1125 1126 static int 1127 native_lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked) 1128 { 1129 1130 if (pin > APIC_LVT_MAX) 1131 return (EINVAL); 1132 if (apic_id == APIC_ID_ALL) { 1133 lvts[pin].lvt_masked = masked; 1134 if (bootverbose) 1135 printf("lapic:"); 1136 } else { 1137 KASSERT(lapics[apic_id].la_present, 1138 ("%s: missing APIC %u", __func__, apic_id)); 1139 lapics[apic_id].la_lvts[pin].lvt_masked = masked; 1140 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1141 if (bootverbose) 1142 printf("lapic%u:", apic_id); 1143 } 1144 if (bootverbose) 1145 printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked"); 1146 return (0); 1147 } 1148 1149 static int 1150 native_lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode) 1151 { 1152 struct lvt *lvt; 1153 1154 if (pin > APIC_LVT_MAX) 1155 return (EINVAL); 1156 if (apic_id == APIC_ID_ALL) { 1157 lvt = &lvts[pin]; 1158 if (bootverbose) 1159 printf("lapic:"); 1160 } else { 1161 KASSERT(lapics[apic_id].la_present, 1162 ("%s: missing APIC %u", __func__, apic_id)); 1163 lvt = &lapics[apic_id].la_lvts[pin]; 1164 lvt->lvt_active = 1; 1165 if (bootverbose) 1166 printf("lapic%u:", apic_id); 1167 } 1168 lvt->lvt_mode = mode; 1169 switch (mode) { 1170 case APIC_LVT_DM_NMI: 1171 case APIC_LVT_DM_SMI: 1172 case APIC_LVT_DM_INIT: 1173 case APIC_LVT_DM_EXTINT: 1174 lvt->lvt_edgetrigger = 1; 1175 lvt->lvt_activehi = 1; 1176 if (mode == APIC_LVT_DM_EXTINT) 1177 lvt->lvt_masked = 1; 1178 else 1179 lvt->lvt_masked = 0; 1180 break; 1181 default: 1182 panic("Unsupported delivery mode: 0x%x\n", mode); 1183 } 1184 if (bootverbose) { 1185 printf(" Routing "); 1186 switch (mode) { 1187 case APIC_LVT_DM_NMI: 1188 printf("NMI"); 1189 break; 1190 case APIC_LVT_DM_SMI: 1191 printf("SMI"); 1192 break; 1193 case APIC_LVT_DM_INIT: 1194 printf("INIT"); 1195 break; 1196 case APIC_LVT_DM_EXTINT: 1197 printf("ExtINT"); 1198 break; 1199 } 1200 printf(" -> LINT%u\n", pin); 1201 } 1202 return (0); 1203 } 1204 1205 static int 1206 native_lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol) 1207 { 1208 1209 if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM) 1210 return (EINVAL); 1211 if (apic_id == APIC_ID_ALL) { 1212 lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); 1213 if (bootverbose) 1214 printf("lapic:"); 1215 } else { 1216 KASSERT(lapics[apic_id].la_present, 1217 ("%s: missing APIC %u", __func__, apic_id)); 1218 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1219 lapics[apic_id].la_lvts[pin].lvt_activehi = 1220 (pol == INTR_POLARITY_HIGH); 1221 if (bootverbose) 1222 printf("lapic%u:", apic_id); 1223 } 1224 if (bootverbose) 1225 printf(" LINT%u polarity: %s\n", pin, 1226 pol == INTR_POLARITY_HIGH ? "high" : "low"); 1227 return (0); 1228 } 1229 1230 static int 1231 native_lapic_set_lvt_triggermode(u_int apic_id, u_int pin, 1232 enum intr_trigger trigger) 1233 { 1234 1235 if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM) 1236 return (EINVAL); 1237 if (apic_id == APIC_ID_ALL) { 1238 lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); 1239 if (bootverbose) 1240 printf("lapic:"); 1241 } else { 1242 KASSERT(lapics[apic_id].la_present, 1243 ("%s: missing APIC %u", __func__, apic_id)); 1244 lapics[apic_id].la_lvts[pin].lvt_edgetrigger = 1245 (trigger == INTR_TRIGGER_EDGE); 1246 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1247 if (bootverbose) 1248 printf("lapic%u:", apic_id); 1249 } 1250 if (bootverbose) 1251 printf(" LINT%u trigger: %s\n", pin, 1252 trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); 1253 return (0); 1254 } 1255 1256 /* 1257 * Adjust the TPR of the current CPU so that it blocks all interrupts below 1258 * the passed in vector. 1259 */ 1260 static void 1261 lapic_set_tpr(u_int vector) 1262 { 1263 #ifdef CHEAP_TPR 1264 lapic_write32(LAPIC_TPR, vector); 1265 #else 1266 uint32_t tpr; 1267 1268 tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO; 1269 tpr |= vector; 1270 lapic_write32(LAPIC_TPR, tpr); 1271 #endif 1272 } 1273 1274 static void 1275 native_lapic_eoi(void) 1276 { 1277 1278 lapic_write32_nofence(LAPIC_EOI, 0); 1279 } 1280 1281 void 1282 lapic_handle_intr(int vector, struct trapframe *frame) 1283 { 1284 struct intsrc *isrc; 1285 1286 isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id), 1287 vector)); 1288 intr_execute_handlers(isrc, frame); 1289 } 1290 1291 void 1292 lapic_handle_timer(struct trapframe *frame) 1293 { 1294 struct lapic *la; 1295 struct trapframe *oldframe; 1296 struct thread *td; 1297 1298 /* Send EOI first thing. */ 1299 lapic_eoi(); 1300 1301 #if defined(SMP) && !defined(SCHED_ULE) 1302 /* 1303 * Don't do any accounting for the disabled HTT cores, since it 1304 * will provide misleading numbers for the userland. 1305 * 1306 * No locking is necessary here, since even if we lose the race 1307 * when hlt_cpus_mask changes it is not a big deal, really. 1308 * 1309 * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask 1310 * and unlike other schedulers it actually schedules threads to 1311 * those CPUs. 1312 */ 1313 if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)) 1314 return; 1315 #endif 1316 1317 /* Look up our local APIC structure for the tick counters. */ 1318 la = &lapics[PCPU_GET(apic_id)]; 1319 (*la->la_timer_count)++; 1320 critical_enter(); 1321 if (lapic_et.et_active) { 1322 td = curthread; 1323 td->td_intr_nesting_level++; 1324 oldframe = td->td_intr_frame; 1325 td->td_intr_frame = frame; 1326 lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg); 1327 td->td_intr_frame = oldframe; 1328 td->td_intr_nesting_level--; 1329 } 1330 critical_exit(); 1331 } 1332 1333 static void 1334 lapic_timer_set_divisor(u_int divisor) 1335 { 1336 1337 KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor)); 1338 KASSERT(ffs(divisor) <= nitems(lapic_timer_divisors), 1339 ("lapic: invalid divisor %u", divisor)); 1340 lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]); 1341 } 1342 1343 static void 1344 lapic_timer_oneshot(struct lapic *la) 1345 { 1346 uint32_t value; 1347 1348 value = la->lvt_timer_base; 1349 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1350 value |= APIC_LVTT_TM_ONE_SHOT; 1351 la->lvt_timer_last = value; 1352 lapic_write32(LAPIC_LVT_TIMER, value); 1353 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1354 } 1355 1356 static void 1357 lapic_timer_oneshot_nointr(struct lapic *la, uint32_t count) 1358 { 1359 uint32_t value; 1360 1361 value = la->lvt_timer_base; 1362 value &= ~APIC_LVTT_TM; 1363 value |= APIC_LVTT_TM_ONE_SHOT | APIC_LVT_M; 1364 la->lvt_timer_last = value; 1365 lapic_write32(LAPIC_LVT_TIMER, value); 1366 lapic_write32(LAPIC_ICR_TIMER, count); 1367 } 1368 1369 static void 1370 lapic_timer_periodic(struct lapic *la) 1371 { 1372 uint32_t value; 1373 1374 value = la->lvt_timer_base; 1375 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1376 value |= APIC_LVTT_TM_PERIODIC; 1377 la->lvt_timer_last = value; 1378 lapic_write32(LAPIC_LVT_TIMER, value); 1379 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1380 } 1381 1382 static void 1383 lapic_timer_deadline(struct lapic *la) 1384 { 1385 uint32_t value; 1386 1387 value = la->lvt_timer_base; 1388 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1389 value |= APIC_LVTT_TM_TSCDLT; 1390 if (value != la->lvt_timer_last) { 1391 la->lvt_timer_last = value; 1392 lapic_write32_nofence(LAPIC_LVT_TIMER, value); 1393 if (!x2apic_mode) 1394 mfence(); 1395 } 1396 wrmsr(MSR_TSC_DEADLINE, la->la_timer_period + rdtsc()); 1397 } 1398 1399 static void 1400 lapic_timer_stop(struct lapic *la) 1401 { 1402 uint32_t value; 1403 1404 if (la->la_timer_mode == LAT_MODE_DEADLINE) { 1405 wrmsr(MSR_TSC_DEADLINE, 0); 1406 mfence(); 1407 } else { 1408 value = la->lvt_timer_base; 1409 value &= ~APIC_LVTT_TM; 1410 value |= APIC_LVT_M; 1411 la->lvt_timer_last = value; 1412 lapic_write32(LAPIC_LVT_TIMER, value); 1413 } 1414 } 1415 1416 void 1417 lapic_handle_cmc(void) 1418 { 1419 1420 lapic_eoi(); 1421 cmc_intr(); 1422 } 1423 1424 /* 1425 * Called from the mca_init() to activate the CMC interrupt if this CPU is 1426 * responsible for monitoring any MC banks for CMC events. Since mca_init() 1427 * is called prior to lapic_setup() during boot, this just needs to unmask 1428 * this CPU's LVT_CMCI entry. 1429 */ 1430 static void 1431 native_lapic_enable_cmc(void) 1432 { 1433 u_int apic_id; 1434 1435 #ifdef DEV_ATPIC 1436 if (!x2apic_mode && lapic_map == NULL) 1437 return; 1438 #endif 1439 apic_id = PCPU_GET(apic_id); 1440 KASSERT(lapics[apic_id].la_present, 1441 ("%s: missing APIC %u", __func__, apic_id)); 1442 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0; 1443 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1; 1444 if (bootverbose) 1445 printf("lapic%u: CMCI unmasked\n", apic_id); 1446 } 1447 1448 static int 1449 native_lapic_enable_mca_elvt(void) 1450 { 1451 u_int apic_id; 1452 uint32_t value; 1453 int elvt_count; 1454 1455 #ifdef DEV_ATPIC 1456 if (lapic_map == NULL) 1457 return (-1); 1458 #endif 1459 1460 apic_id = PCPU_GET(apic_id); 1461 KASSERT(lapics[apic_id].la_present, 1462 ("%s: missing APIC %u", __func__, apic_id)); 1463 elvt_count = amd_read_elvt_count(); 1464 if (elvt_count <= APIC_ELVT_MCA) 1465 return (-1); 1466 1467 value = lapic_read32(LAPIC_EXT_LVT0 + APIC_ELVT_MCA); 1468 if ((value & APIC_LVT_M) == 0) { 1469 if (bootverbose) 1470 printf("AMD MCE Thresholding Extended LVT is already active\n"); 1471 return (APIC_ELVT_MCA); 1472 } 1473 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_masked = 0; 1474 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_active = 1; 1475 if (bootverbose) 1476 printf("lapic%u: MCE Thresholding ELVT unmasked\n", apic_id); 1477 return (APIC_ELVT_MCA); 1478 } 1479 1480 void 1481 lapic_handle_error(void) 1482 { 1483 uint32_t esr; 1484 1485 /* 1486 * Read the contents of the error status register. Write to 1487 * the register first before reading from it to force the APIC 1488 * to update its value to indicate any errors that have 1489 * occurred since the previous write to the register. 1490 */ 1491 lapic_write32(LAPIC_ESR, 0); 1492 esr = lapic_read32(LAPIC_ESR); 1493 1494 printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr); 1495 lapic_eoi(); 1496 } 1497 1498 static u_int 1499 native_apic_cpuid(u_int apic_id) 1500 { 1501 #ifdef SMP 1502 return apic_cpuids[apic_id]; 1503 #else 1504 return 0; 1505 #endif 1506 } 1507 1508 /* Request a free IDT vector to be used by the specified IRQ. */ 1509 static u_int 1510 native_apic_alloc_vector(u_int apic_id, u_int irq) 1511 { 1512 u_int vector; 1513 1514 KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); 1515 1516 /* 1517 * Search for a free vector. Currently we just use a very simple 1518 * algorithm to find the first free vector. 1519 */ 1520 mtx_lock_spin(&icu_lock); 1521 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1522 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) 1523 continue; 1524 lapics[apic_id].la_ioint_irqs[vector] = irq; 1525 mtx_unlock_spin(&icu_lock); 1526 return (vector + APIC_IO_INTS); 1527 } 1528 mtx_unlock_spin(&icu_lock); 1529 return (0); 1530 } 1531 1532 /* 1533 * Request 'count' free contiguous IDT vectors to be used by 'count' 1534 * IRQs. 'count' must be a power of two and the vectors will be 1535 * aligned on a boundary of 'align'. If the request cannot be 1536 * satisfied, 0 is returned. 1537 */ 1538 static u_int 1539 native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) 1540 { 1541 u_int first, run, vector; 1542 1543 KASSERT(powerof2(count), ("bad count")); 1544 KASSERT(powerof2(align), ("bad align")); 1545 KASSERT(align >= count, ("align < count")); 1546 #ifdef INVARIANTS 1547 for (run = 0; run < count; run++) 1548 KASSERT(irqs[run] < num_io_irqs, ("Invalid IRQ %u at index %u", 1549 irqs[run], run)); 1550 #endif 1551 1552 /* 1553 * Search for 'count' free vectors. As with apic_alloc_vector(), 1554 * this just uses a simple first fit algorithm. 1555 */ 1556 run = 0; 1557 first = 0; 1558 mtx_lock_spin(&icu_lock); 1559 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1560 1561 /* Vector is in use, end run. */ 1562 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) { 1563 run = 0; 1564 first = 0; 1565 continue; 1566 } 1567 1568 /* Start a new run if run == 0 and vector is aligned. */ 1569 if (run == 0) { 1570 if ((vector & (align - 1)) != 0) 1571 continue; 1572 first = vector; 1573 } 1574 run++; 1575 1576 /* Keep looping if the run isn't long enough yet. */ 1577 if (run < count) 1578 continue; 1579 1580 /* Found a run, assign IRQs and return the first vector. */ 1581 for (vector = 0; vector < count; vector++) 1582 lapics[apic_id].la_ioint_irqs[first + vector] = 1583 irqs[vector]; 1584 mtx_unlock_spin(&icu_lock); 1585 return (first + APIC_IO_INTS); 1586 } 1587 mtx_unlock_spin(&icu_lock); 1588 printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count); 1589 return (0); 1590 } 1591 1592 /* 1593 * Enable a vector for a particular apic_id. Since all lapics share idt 1594 * entries and ioint_handlers this enables the vector on all lapics. lapics 1595 * which do not have the vector configured would report spurious interrupts 1596 * should it fire. 1597 */ 1598 static void 1599 native_apic_enable_vector(u_int apic_id, u_int vector) 1600 { 1601 1602 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1603 KASSERT(ioint_handlers[vector / 32] != NULL, 1604 ("No ISR handler for vector %u", vector)); 1605 #ifdef KDTRACE_HOOKS 1606 KASSERT(vector != IDT_DTRACE_RET, 1607 ("Attempt to overwrite DTrace entry")); 1608 #endif 1609 setidt(vector, (pti ? ioint_pti_handlers : ioint_handlers)[vector / 32], 1610 SDT_APIC, SEL_KPL, GSEL_APIC); 1611 } 1612 1613 static void 1614 native_apic_disable_vector(u_int apic_id, u_int vector) 1615 { 1616 1617 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1618 #ifdef KDTRACE_HOOKS 1619 KASSERT(vector != IDT_DTRACE_RET, 1620 ("Attempt to overwrite DTrace entry")); 1621 #endif 1622 KASSERT(ioint_handlers[vector / 32] != NULL, 1623 ("No ISR handler for vector %u", vector)); 1624 #ifdef notyet 1625 /* 1626 * We can not currently clear the idt entry because other cpus 1627 * may have a valid vector at this offset. 1628 */ 1629 setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC, 1630 SEL_KPL, GSEL_APIC); 1631 #endif 1632 } 1633 1634 /* Release an APIC vector when it's no longer in use. */ 1635 static void 1636 native_apic_free_vector(u_int apic_id, u_int vector, u_int irq) 1637 { 1638 struct thread *td; 1639 1640 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1641 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1642 ("Vector %u does not map to an IRQ line", vector)); 1643 KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); 1644 KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] == 1645 irq, ("IRQ mismatch")); 1646 #ifdef KDTRACE_HOOKS 1647 KASSERT(vector != IDT_DTRACE_RET, 1648 ("Attempt to overwrite DTrace entry")); 1649 #endif 1650 1651 /* 1652 * Bind us to the cpu that owned the vector before freeing it so 1653 * we don't lose an interrupt delivery race. 1654 */ 1655 td = curthread; 1656 if (!rebooting) { 1657 thread_lock(td); 1658 if (sched_is_bound(td)) 1659 panic("apic_free_vector: Thread already bound.\n"); 1660 sched_bind(td, apic_cpuid(apic_id)); 1661 thread_unlock(td); 1662 } 1663 mtx_lock_spin(&icu_lock); 1664 lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = IRQ_FREE; 1665 mtx_unlock_spin(&icu_lock); 1666 if (!rebooting) { 1667 thread_lock(td); 1668 sched_unbind(td); 1669 thread_unlock(td); 1670 } 1671 } 1672 1673 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */ 1674 static u_int 1675 apic_idt_to_irq(u_int apic_id, u_int vector) 1676 { 1677 int irq; 1678 1679 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1680 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1681 ("Vector %u does not map to an IRQ line", vector)); 1682 #ifdef KDTRACE_HOOKS 1683 KASSERT(vector != IDT_DTRACE_RET, 1684 ("Attempt to overwrite DTrace entry")); 1685 #endif 1686 irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]; 1687 if (irq < 0) 1688 irq = 0; 1689 return (irq); 1690 } 1691 1692 #ifdef DDB 1693 /* 1694 * Dump data about APIC IDT vector mappings. 1695 */ 1696 DB_SHOW_COMMAND(apic, db_show_apic) 1697 { 1698 struct intsrc *isrc; 1699 int i, verbose; 1700 u_int apic_id; 1701 u_int irq; 1702 1703 if (strcmp(modif, "vv") == 0) 1704 verbose = 2; 1705 else if (strcmp(modif, "v") == 0) 1706 verbose = 1; 1707 else 1708 verbose = 0; 1709 for (apic_id = 0; apic_id <= max_apic_id; apic_id++) { 1710 if (lapics[apic_id].la_present == 0) 1711 continue; 1712 db_printf("Interrupts bound to lapic %u\n", apic_id); 1713 for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) { 1714 irq = lapics[apic_id].la_ioint_irqs[i]; 1715 if (irq == IRQ_FREE || irq == IRQ_SYSCALL) 1716 continue; 1717 #ifdef KDTRACE_HOOKS 1718 if (irq == IRQ_DTRACE_RET) 1719 continue; 1720 #endif 1721 #ifdef XENHVM 1722 if (irq == IRQ_EVTCHN) 1723 continue; 1724 #endif 1725 db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); 1726 if (irq == IRQ_TIMER) 1727 db_printf("lapic timer\n"); 1728 else if (irq < num_io_irqs) { 1729 isrc = intr_lookup_source(irq); 1730 if (isrc == NULL || verbose == 0) 1731 db_printf("IRQ %u\n", irq); 1732 else 1733 db_dump_intr_event(isrc->is_event, 1734 verbose == 2); 1735 } else 1736 db_printf("IRQ %u ???\n", irq); 1737 } 1738 } 1739 } 1740 1741 static void 1742 dump_mask(const char *prefix, uint32_t v, int base) 1743 { 1744 int i, first; 1745 1746 first = 1; 1747 for (i = 0; i < 32; i++) 1748 if (v & (1 << i)) { 1749 if (first) { 1750 db_printf("%s:", prefix); 1751 first = 0; 1752 } 1753 db_printf(" %02x", base + i); 1754 } 1755 if (!first) 1756 db_printf("\n"); 1757 } 1758 1759 /* Show info from the lapic regs for this CPU. */ 1760 DB_SHOW_COMMAND(lapic, db_show_lapic) 1761 { 1762 uint32_t v; 1763 1764 db_printf("lapic ID = %d\n", lapic_id()); 1765 v = lapic_read32(LAPIC_VERSION); 1766 db_printf("version = %d.%d\n", (v & APIC_VER_VERSION) >> 4, 1767 v & 0xf); 1768 db_printf("max LVT = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT); 1769 v = lapic_read32(LAPIC_SVR); 1770 db_printf("SVR = %02x (%s)\n", v & APIC_SVR_VECTOR, 1771 v & APIC_SVR_ENABLE ? "enabled" : "disabled"); 1772 db_printf("TPR = %02x\n", lapic_read32(LAPIC_TPR)); 1773 1774 #define dump_field(prefix, regn, index) \ 1775 dump_mask(__XSTRING(prefix ## index), \ 1776 lapic_read32(LAPIC_ ## regn ## index), \ 1777 index * 32) 1778 1779 db_printf("In-service Interrupts:\n"); 1780 dump_field(isr, ISR, 0); 1781 dump_field(isr, ISR, 1); 1782 dump_field(isr, ISR, 2); 1783 dump_field(isr, ISR, 3); 1784 dump_field(isr, ISR, 4); 1785 dump_field(isr, ISR, 5); 1786 dump_field(isr, ISR, 6); 1787 dump_field(isr, ISR, 7); 1788 1789 db_printf("TMR Interrupts:\n"); 1790 dump_field(tmr, TMR, 0); 1791 dump_field(tmr, TMR, 1); 1792 dump_field(tmr, TMR, 2); 1793 dump_field(tmr, TMR, 3); 1794 dump_field(tmr, TMR, 4); 1795 dump_field(tmr, TMR, 5); 1796 dump_field(tmr, TMR, 6); 1797 dump_field(tmr, TMR, 7); 1798 1799 db_printf("IRR Interrupts:\n"); 1800 dump_field(irr, IRR, 0); 1801 dump_field(irr, IRR, 1); 1802 dump_field(irr, IRR, 2); 1803 dump_field(irr, IRR, 3); 1804 dump_field(irr, IRR, 4); 1805 dump_field(irr, IRR, 5); 1806 dump_field(irr, IRR, 6); 1807 dump_field(irr, IRR, 7); 1808 1809 #undef dump_field 1810 } 1811 #endif 1812 1813 /* 1814 * APIC probing support code. This includes code to manage enumerators. 1815 */ 1816 1817 static SLIST_HEAD(, apic_enumerator) enumerators = 1818 SLIST_HEAD_INITIALIZER(enumerators); 1819 static struct apic_enumerator *best_enum; 1820 1821 void 1822 apic_register_enumerator(struct apic_enumerator *enumerator) 1823 { 1824 #ifdef INVARIANTS 1825 struct apic_enumerator *apic_enum; 1826 1827 SLIST_FOREACH(apic_enum, &enumerators, apic_next) { 1828 if (apic_enum == enumerator) 1829 panic("%s: Duplicate register of %s", __func__, 1830 enumerator->apic_name); 1831 } 1832 #endif 1833 SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next); 1834 } 1835 1836 /* 1837 * We have to look for CPU's very, very early because certain subsystems 1838 * want to know how many CPU's we have extremely early on in the boot 1839 * process. 1840 */ 1841 static void 1842 apic_init(void *dummy __unused) 1843 { 1844 struct apic_enumerator *enumerator; 1845 int retval, best; 1846 1847 /* We only support built in local APICs. */ 1848 if (!(cpu_feature & CPUID_APIC)) 1849 return; 1850 1851 /* Don't probe if APIC mode is disabled. */ 1852 if (resource_disabled("apic", 0)) 1853 return; 1854 1855 /* Probe all the enumerators to find the best match. */ 1856 best_enum = NULL; 1857 best = 0; 1858 SLIST_FOREACH(enumerator, &enumerators, apic_next) { 1859 retval = enumerator->apic_probe(); 1860 if (retval > 0) 1861 continue; 1862 if (best_enum == NULL || best < retval) { 1863 best_enum = enumerator; 1864 best = retval; 1865 } 1866 } 1867 if (best_enum == NULL) { 1868 if (bootverbose) 1869 printf("APIC: Could not find any APICs.\n"); 1870 #ifndef DEV_ATPIC 1871 panic("running without device atpic requires a local APIC"); 1872 #endif 1873 return; 1874 } 1875 1876 if (bootverbose) 1877 printf("APIC: Using the %s enumerator.\n", 1878 best_enum->apic_name); 1879 1880 #ifdef I686_CPU 1881 /* 1882 * To work around an errata, we disable the local APIC on some 1883 * CPUs during early startup. We need to turn the local APIC back 1884 * on on such CPUs now. 1885 */ 1886 ppro_reenable_apic(); 1887 #endif 1888 1889 /* Probe the CPU's in the system. */ 1890 retval = best_enum->apic_probe_cpus(); 1891 if (retval != 0) 1892 printf("%s: Failed to probe CPUs: returned %d\n", 1893 best_enum->apic_name, retval); 1894 1895 } 1896 SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL); 1897 1898 /* 1899 * Setup the local APIC. We have to do this prior to starting up the APs 1900 * in the SMP case. 1901 */ 1902 static void 1903 apic_setup_local(void *dummy __unused) 1904 { 1905 int retval; 1906 1907 if (best_enum == NULL) 1908 return; 1909 1910 lapics = malloc(sizeof(*lapics) * (max_apic_id + 1), M_LAPIC, 1911 M_WAITOK | M_ZERO); 1912 1913 /* Initialize the local APIC. */ 1914 retval = best_enum->apic_setup_local(); 1915 if (retval != 0) 1916 printf("%s: Failed to setup the local APIC: returned %d\n", 1917 best_enum->apic_name, retval); 1918 } 1919 SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL); 1920 1921 /* 1922 * Setup the I/O APICs. 1923 */ 1924 static void 1925 apic_setup_io(void *dummy __unused) 1926 { 1927 int retval; 1928 1929 if (best_enum == NULL) 1930 return; 1931 1932 /* 1933 * Local APIC must be registered before other PICs and pseudo PICs 1934 * for proper suspend/resume order. 1935 */ 1936 intr_register_pic(&lapic_pic); 1937 1938 retval = best_enum->apic_setup_io(); 1939 if (retval != 0) 1940 printf("%s: Failed to setup I/O APICs: returned %d\n", 1941 best_enum->apic_name, retval); 1942 1943 /* 1944 * Finish setting up the local APIC on the BSP once we know 1945 * how to properly program the LINT pins. In particular, this 1946 * enables the EOI suppression mode, if LAPIC supports it and 1947 * user did not disable the mode. 1948 */ 1949 lapic_setup(1); 1950 if (bootverbose) 1951 lapic_dump("BSP"); 1952 1953 /* Enable the MSI "pic". */ 1954 init_ops.msi_init(); 1955 1956 #ifdef XENHVM 1957 xen_intr_alloc_irqs(); 1958 #endif 1959 } 1960 SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL); 1961 1962 #ifdef SMP 1963 /* 1964 * Inter Processor Interrupt functions. The lapic_ipi_*() functions are 1965 * private to the MD code. The public interface for the rest of the 1966 * kernel is defined in mp_machdep.c. 1967 */ 1968 1969 /* 1970 * Wait delay microseconds for IPI to be sent. If delay is -1, we 1971 * wait forever. 1972 */ 1973 static int 1974 native_lapic_ipi_wait(int delay) 1975 { 1976 uint64_t rx; 1977 1978 /* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */ 1979 if (x2apic_mode) 1980 return (1); 1981 1982 for (rx = 0; delay == -1 || rx < lapic_ipi_wait_mult * delay; rx++) { 1983 if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) == 1984 APIC_DELSTAT_IDLE) 1985 return (1); 1986 ia32_pause(); 1987 } 1988 return (0); 1989 } 1990 1991 static void 1992 native_lapic_ipi_raw(register_t icrlo, u_int dest) 1993 { 1994 uint64_t icr; 1995 uint32_t vhi, vlo; 1996 register_t saveintr; 1997 1998 /* XXX: Need more sanity checking of icrlo? */ 1999 KASSERT(x2apic_mode || lapic_map != NULL, 2000 ("%s called too early", __func__)); 2001 KASSERT(x2apic_mode || 2002 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 2003 ("%s: invalid dest field", __func__)); 2004 KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0, 2005 ("%s: reserved bits set in ICR LO register", __func__)); 2006 2007 /* Set destination in ICR HI register if it is being used. */ 2008 if (!x2apic_mode) { 2009 saveintr = intr_disable(); 2010 icr = lapic_read_icr(); 2011 } 2012 2013 if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) { 2014 if (x2apic_mode) { 2015 vhi = dest; 2016 } else { 2017 vhi = icr >> 32; 2018 vhi &= ~APIC_ID_MASK; 2019 vhi |= dest << APIC_ID_SHIFT; 2020 } 2021 } else { 2022 vhi = 0; 2023 } 2024 2025 /* Program the contents of the IPI and dispatch it. */ 2026 if (x2apic_mode) { 2027 vlo = icrlo; 2028 } else { 2029 vlo = icr; 2030 vlo &= APIC_ICRLO_RESV_MASK; 2031 vlo |= icrlo; 2032 } 2033 lapic_write_icr(vhi, vlo); 2034 if (!x2apic_mode) 2035 intr_restore(saveintr); 2036 } 2037 2038 #define BEFORE_SPIN 50000 2039 #ifdef DETECT_DEADLOCK 2040 #define AFTER_SPIN 50 2041 #endif 2042 2043 static void 2044 native_lapic_ipi_vectored(u_int vector, int dest) 2045 { 2046 register_t icrlo, destfield; 2047 2048 KASSERT((vector & ~APIC_VECTOR_MASK) == 0, 2049 ("%s: invalid vector %d", __func__, vector)); 2050 2051 icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT; 2052 2053 /* 2054 * NMI IPIs are just fake vectors used to send a NMI. Use special rules 2055 * regarding NMIs if passed, otherwise specify the vector. 2056 */ 2057 if (vector >= IPI_NMI_FIRST) 2058 icrlo |= APIC_DELMODE_NMI; 2059 else 2060 icrlo |= vector | APIC_DELMODE_FIXED; 2061 destfield = 0; 2062 switch (dest) { 2063 case APIC_IPI_DEST_SELF: 2064 icrlo |= APIC_DEST_SELF; 2065 break; 2066 case APIC_IPI_DEST_ALL: 2067 icrlo |= APIC_DEST_ALLISELF; 2068 break; 2069 case APIC_IPI_DEST_OTHERS: 2070 icrlo |= APIC_DEST_ALLESELF; 2071 break; 2072 default: 2073 KASSERT(x2apic_mode || 2074 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 2075 ("%s: invalid destination 0x%x", __func__, dest)); 2076 destfield = dest; 2077 } 2078 2079 /* Wait for an earlier IPI to finish. */ 2080 if (!lapic_ipi_wait(BEFORE_SPIN)) { 2081 if (KERNEL_PANICKED()) 2082 return; 2083 else 2084 panic("APIC: Previous IPI is stuck"); 2085 } 2086 2087 lapic_ipi_raw(icrlo, destfield); 2088 2089 #ifdef DETECT_DEADLOCK 2090 /* Wait for IPI to be delivered. */ 2091 if (!lapic_ipi_wait(AFTER_SPIN)) { 2092 #ifdef needsattention 2093 /* 2094 * XXX FIXME: 2095 * 2096 * The above function waits for the message to actually be 2097 * delivered. It breaks out after an arbitrary timeout 2098 * since the message should eventually be delivered (at 2099 * least in theory) and that if it wasn't we would catch 2100 * the failure with the check above when the next IPI is 2101 * sent. 2102 * 2103 * We could skip this wait entirely, EXCEPT it probably 2104 * protects us from other routines that assume that the 2105 * message was delivered and acted upon when this function 2106 * returns. 2107 */ 2108 printf("APIC: IPI might be stuck\n"); 2109 #else /* !needsattention */ 2110 /* Wait until mesage is sent without a timeout. */ 2111 while (lapic_read_icr_lo() & APIC_DELSTAT_PEND) 2112 ia32_pause(); 2113 #endif /* needsattention */ 2114 } 2115 #endif /* DETECT_DEADLOCK */ 2116 } 2117 2118 #endif /* SMP */ 2119 2120 /* 2121 * Since the IDT is shared by all CPUs the IPI slot update needs to be globally 2122 * visible. 2123 * 2124 * Consider the case where an IPI is generated immediately after allocation: 2125 * vector = lapic_ipi_alloc(ipifunc); 2126 * ipi_selected(other_cpus, vector); 2127 * 2128 * In xAPIC mode a write to ICR_LO has serializing semantics because the 2129 * APIC page is mapped as an uncached region. In x2APIC mode there is an 2130 * explicit 'mfence' before the ICR MSR is written. Therefore in both cases 2131 * the IDT slot update is globally visible before the IPI is delivered. 2132 */ 2133 static int 2134 native_lapic_ipi_alloc(inthand_t *ipifunc) 2135 { 2136 struct gate_descriptor *ip; 2137 long func; 2138 int idx, vector; 2139 2140 KASSERT(ipifunc != &IDTVEC(rsvd) && ipifunc != &IDTVEC(rsvd_pti), 2141 ("invalid ipifunc %p", ipifunc)); 2142 2143 vector = -1; 2144 mtx_lock_spin(&icu_lock); 2145 for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) { 2146 ip = &idt[idx]; 2147 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2148 if ((!pti && func == (uintptr_t)&IDTVEC(rsvd)) || 2149 (pti && func == (uintptr_t)&IDTVEC(rsvd_pti))) { 2150 vector = idx; 2151 setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC); 2152 break; 2153 } 2154 } 2155 mtx_unlock_spin(&icu_lock); 2156 return (vector); 2157 } 2158 2159 static void 2160 native_lapic_ipi_free(int vector) 2161 { 2162 struct gate_descriptor *ip; 2163 long func; 2164 2165 KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST, 2166 ("%s: invalid vector %d", __func__, vector)); 2167 2168 mtx_lock_spin(&icu_lock); 2169 ip = &idt[vector]; 2170 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2171 KASSERT(func != (uintptr_t)&IDTVEC(rsvd) && 2172 func != (uintptr_t)&IDTVEC(rsvd_pti), 2173 ("invalid idtfunc %#lx", func)); 2174 setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC, 2175 SEL_KPL, GSEL_APIC); 2176 mtx_unlock_spin(&icu_lock); 2177 } 2178