1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org> 5 * Copyright (c) 1996, by Steve Passe 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. The name of the developer may NOT be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 3. Neither the name of the author nor the names of any co-contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Local APIC support on Pentium and later processors. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_atpic.h" 40 #include "opt_hwpmc_hooks.h" 41 42 #include "opt_ddb.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bus.h> 47 #include <sys/kernel.h> 48 #include <sys/lock.h> 49 #include <sys/malloc.h> 50 #include <sys/mutex.h> 51 #include <sys/pcpu.h> 52 #include <sys/proc.h> 53 #include <sys/sched.h> 54 #include <sys/smp.h> 55 #include <sys/sysctl.h> 56 #include <sys/timeet.h> 57 58 #include <vm/vm.h> 59 #include <vm/pmap.h> 60 61 #include <x86/apicreg.h> 62 #include <machine/clock.h> 63 #include <machine/cpufunc.h> 64 #include <machine/cputypes.h> 65 #include <machine/frame.h> 66 #include <machine/intr_machdep.h> 67 #include <x86/apicvar.h> 68 #include <x86/mca.h> 69 #include <machine/md_var.h> 70 #include <machine/smp.h> 71 #include <machine/specialreg.h> 72 #include <x86/init.h> 73 74 #ifdef DDB 75 #include <sys/interrupt.h> 76 #include <ddb/ddb.h> 77 #endif 78 79 #ifdef __amd64__ 80 #define SDT_APIC SDT_SYSIGT 81 #define GSEL_APIC 0 82 #else 83 #define SDT_APIC SDT_SYS386IGT 84 #define GSEL_APIC GSEL(GCODE_SEL, SEL_KPL) 85 #endif 86 87 static MALLOC_DEFINE(M_LAPIC, "local_apic", "Local APIC items"); 88 89 /* Sanity checks on IDT vectors. */ 90 CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT); 91 CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS); 92 CTASSERT(APIC_LOCAL_INTS == 240); 93 CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); 94 95 /* 96 * I/O interrupts use non-negative IRQ values. These values are used 97 * to mark unused IDT entries or IDT entries reserved for a non-I/O 98 * interrupt. 99 */ 100 #define IRQ_FREE -1 101 #define IRQ_TIMER -2 102 #define IRQ_SYSCALL -3 103 #define IRQ_DTRACE_RET -4 104 #define IRQ_EVTCHN -5 105 106 enum lat_timer_mode { 107 LAT_MODE_UNDEF = 0, 108 LAT_MODE_PERIODIC = 1, 109 LAT_MODE_ONESHOT = 2, 110 LAT_MODE_DEADLINE = 3, 111 }; 112 113 /* 114 * Support for local APICs. Local APICs manage interrupts on each 115 * individual processor as opposed to I/O APICs which receive interrupts 116 * from I/O devices and then forward them on to the local APICs. 117 * 118 * Local APICs can also send interrupts to each other thus providing the 119 * mechanism for IPIs. 120 */ 121 122 struct lvt { 123 u_int lvt_edgetrigger:1; 124 u_int lvt_activehi:1; 125 u_int lvt_masked:1; 126 u_int lvt_active:1; 127 u_int lvt_mode:16; 128 u_int lvt_vector:8; 129 }; 130 131 struct lapic { 132 struct lvt la_lvts[APIC_LVT_MAX + 1]; 133 struct lvt la_elvts[APIC_ELVT_MAX + 1];; 134 u_int la_id:8; 135 u_int la_cluster:4; 136 u_int la_cluster_id:2; 137 u_int la_present:1; 138 u_long *la_timer_count; 139 uint64_t la_timer_period; 140 enum lat_timer_mode la_timer_mode; 141 uint32_t lvt_timer_base; 142 uint32_t lvt_timer_last; 143 /* Include IDT_SYSCALL to make indexing easier. */ 144 int la_ioint_irqs[APIC_NUM_IOINTS + 1]; 145 } static *lapics; 146 147 /* Global defaults for local APIC LVT entries. */ 148 static struct lvt lvts[APIC_LVT_MAX + 1] = { 149 { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 }, /* LINT0: masked ExtINT */ 150 { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 }, /* LINT1: NMI */ 151 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT }, /* Timer */ 152 { 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT }, /* Error */ 153 { 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 }, /* PMC */ 154 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT }, /* Thermal */ 155 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT }, /* CMCI */ 156 }; 157 158 /* Global defaults for AMD local APIC ELVT entries. */ 159 static struct lvt elvts[APIC_ELVT_MAX + 1] = { 160 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 161 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, APIC_CMC_INT }, 162 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 163 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 164 }; 165 166 static inthand_t *ioint_handlers[] = { 167 NULL, /* 0 - 31 */ 168 IDTVEC(apic_isr1), /* 32 - 63 */ 169 IDTVEC(apic_isr2), /* 64 - 95 */ 170 IDTVEC(apic_isr3), /* 96 - 127 */ 171 IDTVEC(apic_isr4), /* 128 - 159 */ 172 IDTVEC(apic_isr5), /* 160 - 191 */ 173 IDTVEC(apic_isr6), /* 192 - 223 */ 174 IDTVEC(apic_isr7), /* 224 - 255 */ 175 }; 176 177 static inthand_t *ioint_pti_handlers[] = { 178 NULL, /* 0 - 31 */ 179 IDTVEC(apic_isr1_pti), /* 32 - 63 */ 180 IDTVEC(apic_isr2_pti), /* 64 - 95 */ 181 IDTVEC(apic_isr3_pti), /* 96 - 127 */ 182 IDTVEC(apic_isr4_pti), /* 128 - 159 */ 183 IDTVEC(apic_isr5_pti), /* 160 - 191 */ 184 IDTVEC(apic_isr6_pti), /* 192 - 223 */ 185 IDTVEC(apic_isr7_pti), /* 224 - 255 */ 186 }; 187 188 static u_int32_t lapic_timer_divisors[] = { 189 APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, 190 APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128 191 }; 192 193 extern inthand_t IDTVEC(rsvd_pti), IDTVEC(rsvd); 194 195 volatile char *lapic_map; 196 vm_paddr_t lapic_paddr; 197 int x2apic_mode; 198 int lapic_eoi_suppression; 199 static int lapic_timer_tsc_deadline; 200 static u_long lapic_timer_divisor, count_freq; 201 static struct eventtimer lapic_et; 202 #ifdef SMP 203 static uint64_t lapic_ipi_wait_mult; 204 #endif 205 unsigned int max_apic_id; 206 207 SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD, 0, "APIC options"); 208 SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, ""); 209 SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD, 210 &lapic_eoi_suppression, 0, ""); 211 SYSCTL_INT(_hw_apic, OID_AUTO, timer_tsc_deadline, CTLFLAG_RD, 212 &lapic_timer_tsc_deadline, 0, ""); 213 214 static void lapic_calibrate_initcount(struct lapic *la); 215 static void lapic_calibrate_deadline(struct lapic *la); 216 217 static uint32_t 218 lapic_read32(enum LAPIC_REGISTERS reg) 219 { 220 uint32_t res; 221 222 if (x2apic_mode) { 223 res = rdmsr32(MSR_APIC_000 + reg); 224 } else { 225 res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL); 226 } 227 return (res); 228 } 229 230 static void 231 lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val) 232 { 233 234 if (x2apic_mode) { 235 mfence(); 236 lfence(); 237 wrmsr(MSR_APIC_000 + reg, val); 238 } else { 239 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 240 } 241 } 242 243 static void 244 lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val) 245 { 246 247 if (x2apic_mode) { 248 wrmsr(MSR_APIC_000 + reg, val); 249 } else { 250 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 251 } 252 } 253 254 #ifdef SMP 255 static uint64_t 256 lapic_read_icr(void) 257 { 258 uint64_t v; 259 uint32_t vhi, vlo; 260 261 if (x2apic_mode) { 262 v = rdmsr(MSR_APIC_000 + LAPIC_ICR_LO); 263 } else { 264 vhi = lapic_read32(LAPIC_ICR_HI); 265 vlo = lapic_read32(LAPIC_ICR_LO); 266 v = ((uint64_t)vhi << 32) | vlo; 267 } 268 return (v); 269 } 270 271 static uint64_t 272 lapic_read_icr_lo(void) 273 { 274 275 return (lapic_read32(LAPIC_ICR_LO)); 276 } 277 278 static void 279 lapic_write_icr(uint32_t vhi, uint32_t vlo) 280 { 281 uint64_t v; 282 283 if (x2apic_mode) { 284 v = ((uint64_t)vhi << 32) | vlo; 285 mfence(); 286 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v); 287 } else { 288 lapic_write32(LAPIC_ICR_HI, vhi); 289 lapic_write32(LAPIC_ICR_LO, vlo); 290 } 291 } 292 #endif /* SMP */ 293 294 static void 295 native_lapic_enable_x2apic(void) 296 { 297 uint64_t apic_base; 298 299 apic_base = rdmsr(MSR_APICBASE); 300 apic_base |= APICBASE_X2APIC | APICBASE_ENABLED; 301 wrmsr(MSR_APICBASE, apic_base); 302 } 303 304 static bool 305 native_lapic_is_x2apic(void) 306 { 307 uint64_t apic_base; 308 309 apic_base = rdmsr(MSR_APICBASE); 310 return ((apic_base & (APICBASE_X2APIC | APICBASE_ENABLED)) == 311 (APICBASE_X2APIC | APICBASE_ENABLED)); 312 } 313 314 static void lapic_enable(void); 315 static void lapic_resume(struct pic *pic, bool suspend_cancelled); 316 static void lapic_timer_oneshot(struct lapic *); 317 static void lapic_timer_oneshot_nointr(struct lapic *, uint32_t); 318 static void lapic_timer_periodic(struct lapic *); 319 static void lapic_timer_deadline(struct lapic *); 320 static void lapic_timer_stop(struct lapic *); 321 static void lapic_timer_set_divisor(u_int divisor); 322 static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value); 323 static int lapic_et_start(struct eventtimer *et, 324 sbintime_t first, sbintime_t period); 325 static int lapic_et_stop(struct eventtimer *et); 326 static u_int apic_idt_to_irq(u_int apic_id, u_int vector); 327 static void lapic_set_tpr(u_int vector); 328 329 struct pic lapic_pic = { .pic_resume = lapic_resume }; 330 331 /* Forward declarations for apic_ops */ 332 static void native_lapic_create(u_int apic_id, int boot_cpu); 333 static void native_lapic_init(vm_paddr_t addr); 334 static void native_lapic_xapic_mode(void); 335 static void native_lapic_setup(int boot); 336 static void native_lapic_dump(const char *str); 337 static void native_lapic_disable(void); 338 static void native_lapic_eoi(void); 339 static int native_lapic_id(void); 340 static int native_lapic_intr_pending(u_int vector); 341 static u_int native_apic_cpuid(u_int apic_id); 342 static u_int native_apic_alloc_vector(u_int apic_id, u_int irq); 343 static u_int native_apic_alloc_vectors(u_int apic_id, u_int *irqs, 344 u_int count, u_int align); 345 static void native_apic_disable_vector(u_int apic_id, u_int vector); 346 static void native_apic_enable_vector(u_int apic_id, u_int vector); 347 static void native_apic_free_vector(u_int apic_id, u_int vector, u_int irq); 348 static void native_lapic_set_logical_id(u_int apic_id, u_int cluster, 349 u_int cluster_id); 350 static int native_lapic_enable_pmc(void); 351 static void native_lapic_disable_pmc(void); 352 static void native_lapic_reenable_pmc(void); 353 static void native_lapic_enable_cmc(void); 354 static int native_lapic_enable_mca_elvt(void); 355 static int native_lapic_set_lvt_mask(u_int apic_id, u_int lvt, 356 u_char masked); 357 static int native_lapic_set_lvt_mode(u_int apic_id, u_int lvt, 358 uint32_t mode); 359 static int native_lapic_set_lvt_polarity(u_int apic_id, u_int lvt, 360 enum intr_polarity pol); 361 static int native_lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, 362 enum intr_trigger trigger); 363 #ifdef SMP 364 static void native_lapic_ipi_raw(register_t icrlo, u_int dest); 365 static void native_lapic_ipi_vectored(u_int vector, int dest); 366 static int native_lapic_ipi_wait(int delay); 367 #endif /* SMP */ 368 static int native_lapic_ipi_alloc(inthand_t *ipifunc); 369 static void native_lapic_ipi_free(int vector); 370 371 struct apic_ops apic_ops = { 372 .create = native_lapic_create, 373 .init = native_lapic_init, 374 .xapic_mode = native_lapic_xapic_mode, 375 .is_x2apic = native_lapic_is_x2apic, 376 .setup = native_lapic_setup, 377 .dump = native_lapic_dump, 378 .disable = native_lapic_disable, 379 .eoi = native_lapic_eoi, 380 .id = native_lapic_id, 381 .intr_pending = native_lapic_intr_pending, 382 .set_logical_id = native_lapic_set_logical_id, 383 .cpuid = native_apic_cpuid, 384 .alloc_vector = native_apic_alloc_vector, 385 .alloc_vectors = native_apic_alloc_vectors, 386 .enable_vector = native_apic_enable_vector, 387 .disable_vector = native_apic_disable_vector, 388 .free_vector = native_apic_free_vector, 389 .enable_pmc = native_lapic_enable_pmc, 390 .disable_pmc = native_lapic_disable_pmc, 391 .reenable_pmc = native_lapic_reenable_pmc, 392 .enable_cmc = native_lapic_enable_cmc, 393 .enable_mca_elvt = native_lapic_enable_mca_elvt, 394 #ifdef SMP 395 .ipi_raw = native_lapic_ipi_raw, 396 .ipi_vectored = native_lapic_ipi_vectored, 397 .ipi_wait = native_lapic_ipi_wait, 398 #endif 399 .ipi_alloc = native_lapic_ipi_alloc, 400 .ipi_free = native_lapic_ipi_free, 401 .set_lvt_mask = native_lapic_set_lvt_mask, 402 .set_lvt_mode = native_lapic_set_lvt_mode, 403 .set_lvt_polarity = native_lapic_set_lvt_polarity, 404 .set_lvt_triggermode = native_lapic_set_lvt_triggermode, 405 }; 406 407 static uint32_t 408 lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value) 409 { 410 411 value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM | 412 APIC_LVT_VECTOR); 413 if (lvt->lvt_edgetrigger == 0) 414 value |= APIC_LVT_TM; 415 if (lvt->lvt_activehi == 0) 416 value |= APIC_LVT_IIPP_INTALO; 417 if (lvt->lvt_masked) 418 value |= APIC_LVT_M; 419 value |= lvt->lvt_mode; 420 switch (lvt->lvt_mode) { 421 case APIC_LVT_DM_NMI: 422 case APIC_LVT_DM_SMI: 423 case APIC_LVT_DM_INIT: 424 case APIC_LVT_DM_EXTINT: 425 if (!lvt->lvt_edgetrigger && bootverbose) { 426 printf("lapic%u: Forcing LINT%u to edge trigger\n", 427 la->la_id, pin); 428 value &= ~APIC_LVT_TM; 429 } 430 /* Use a vector of 0. */ 431 break; 432 case APIC_LVT_DM_FIXED: 433 value |= lvt->lvt_vector; 434 break; 435 default: 436 panic("bad APIC LVT delivery mode: %#x\n", value); 437 } 438 return (value); 439 } 440 441 static uint32_t 442 lvt_mode(struct lapic *la, u_int pin, uint32_t value) 443 { 444 struct lvt *lvt; 445 446 KASSERT(pin <= APIC_LVT_MAX, 447 ("%s: pin %u out of range", __func__, pin)); 448 if (la->la_lvts[pin].lvt_active) 449 lvt = &la->la_lvts[pin]; 450 else 451 lvt = &lvts[pin]; 452 453 return (lvt_mode_impl(la, lvt, pin, value)); 454 } 455 456 static uint32_t 457 elvt_mode(struct lapic *la, u_int idx, uint32_t value) 458 { 459 struct lvt *elvt; 460 461 KASSERT(idx <= APIC_ELVT_MAX, 462 ("%s: idx %u out of range", __func__, idx)); 463 464 elvt = &la->la_elvts[idx]; 465 KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx)); 466 KASSERT(elvt->lvt_edgetrigger, 467 ("%s: ELVT%u is not edge triggered", __func__, idx)); 468 KASSERT(elvt->lvt_activehi, 469 ("%s: ELVT%u is not active high", __func__, idx)); 470 return (lvt_mode_impl(la, elvt, idx, value)); 471 } 472 473 /* 474 * Map the local APIC and setup necessary interrupt vectors. 475 */ 476 static void 477 native_lapic_init(vm_paddr_t addr) 478 { 479 #ifdef SMP 480 uint64_t r, r1, r2, rx; 481 #endif 482 uint32_t ver; 483 u_int regs[4]; 484 int i, arat; 485 486 /* 487 * Enable x2APIC mode if possible. Map the local APIC 488 * registers page. 489 * 490 * Keep the LAPIC registers page mapped uncached for x2APIC 491 * mode too, to have direct map page attribute set to 492 * uncached. This is needed to work around CPU errata present 493 * on all Intel processors. 494 */ 495 KASSERT(trunc_page(addr) == addr, 496 ("local APIC not aligned on a page boundary")); 497 lapic_paddr = addr; 498 lapic_map = pmap_mapdev(addr, PAGE_SIZE); 499 if (x2apic_mode) { 500 native_lapic_enable_x2apic(); 501 lapic_map = NULL; 502 } 503 504 /* Setup the spurious interrupt handler. */ 505 setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL, 506 GSEL_APIC); 507 508 /* Perform basic initialization of the BSP's local APIC. */ 509 lapic_enable(); 510 511 /* Set BSP's per-CPU local APIC ID. */ 512 PCPU_SET(apic_id, lapic_id()); 513 514 /* Local APIC timer interrupt. */ 515 setidt(APIC_TIMER_INT, pti ? IDTVEC(timerint_pti) : IDTVEC(timerint), 516 SDT_APIC, SEL_KPL, GSEL_APIC); 517 518 /* Local APIC error interrupt. */ 519 setidt(APIC_ERROR_INT, pti ? IDTVEC(errorint_pti) : IDTVEC(errorint), 520 SDT_APIC, SEL_KPL, GSEL_APIC); 521 522 /* XXX: Thermal interrupt */ 523 524 /* Local APIC CMCI. */ 525 setidt(APIC_CMC_INT, pti ? IDTVEC(cmcint_pti) : IDTVEC(cmcint), 526 SDT_APIC, SEL_KPL, GSEL_APIC); 527 528 if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) { 529 arat = 0; 530 /* Intel CPUID 0x06 EAX[2] set if APIC timer runs in C3. */ 531 if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_high >= 6) { 532 do_cpuid(0x06, regs); 533 if ((regs[0] & CPUTPM1_ARAT) != 0) 534 arat = 1; 535 } else if (cpu_vendor_id == CPU_VENDOR_AMD && 536 CPUID_TO_FAMILY(cpu_id) >= 0x12) { 537 arat = 1; 538 } 539 bzero(&lapic_et, sizeof(lapic_et)); 540 lapic_et.et_name = "LAPIC"; 541 lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT | 542 ET_FLAGS_PERCPU; 543 lapic_et.et_quality = 600; 544 if (!arat) { 545 lapic_et.et_flags |= ET_FLAGS_C3STOP; 546 lapic_et.et_quality = 100; 547 } 548 if ((cpu_feature & CPUID_TSC) != 0 && 549 (cpu_feature2 & CPUID2_TSCDLT) != 0 && 550 tsc_is_invariant && tsc_freq != 0) { 551 lapic_timer_tsc_deadline = 1; 552 TUNABLE_INT_FETCH("hw.lapic_tsc_deadline", 553 &lapic_timer_tsc_deadline); 554 } 555 556 lapic_et.et_frequency = 0; 557 /* We don't know frequency yet, so trying to guess. */ 558 lapic_et.et_min_period = 0x00001000LL; 559 lapic_et.et_max_period = SBT_1S; 560 lapic_et.et_start = lapic_et_start; 561 lapic_et.et_stop = lapic_et_stop; 562 lapic_et.et_priv = NULL; 563 et_register(&lapic_et); 564 } 565 566 /* 567 * Set lapic_eoi_suppression after lapic_enable(), to not 568 * enable suppression in the hardware prematurely. Note that 569 * we by default enable suppression even when system only has 570 * one IO-APIC, since EOI is broadcasted to all APIC agents, 571 * including CPUs, otherwise. 572 * 573 * It seems that at least some KVM versions report 574 * EOI_SUPPRESSION bit, but auto-EOI does not work. 575 */ 576 ver = lapic_read32(LAPIC_VERSION); 577 if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) { 578 lapic_eoi_suppression = 1; 579 if (vm_guest == VM_GUEST_KVM) { 580 if (bootverbose) 581 printf( 582 "KVM -- disabling lapic eoi suppression\n"); 583 lapic_eoi_suppression = 0; 584 } 585 TUNABLE_INT_FETCH("hw.lapic_eoi_suppression", 586 &lapic_eoi_suppression); 587 } 588 589 #ifdef SMP 590 #define LOOPS 100000 591 /* 592 * Calibrate the busy loop waiting for IPI ack in xAPIC mode. 593 * lapic_ipi_wait_mult contains the number of iterations which 594 * approximately delay execution for 1 microsecond (the 595 * argument to native_lapic_ipi_wait() is in microseconds). 596 * 597 * We assume that TSC is present and already measured. 598 * Possible TSC frequency jumps are irrelevant to the 599 * calibration loop below, the CPU clock management code is 600 * not yet started, and we do not enter sleep states. 601 */ 602 KASSERT((cpu_feature & CPUID_TSC) != 0 && tsc_freq != 0, 603 ("TSC not initialized")); 604 if (!x2apic_mode) { 605 r = rdtsc(); 606 for (rx = 0; rx < LOOPS; rx++) { 607 (void)lapic_read_icr_lo(); 608 ia32_pause(); 609 } 610 r = rdtsc() - r; 611 r1 = tsc_freq * LOOPS; 612 r2 = r * 1000000; 613 lapic_ipi_wait_mult = r1 >= r2 ? r1 / r2 : 1; 614 if (bootverbose) { 615 printf("LAPIC: ipi_wait() us multiplier %ju (r %ju " 616 "tsc %ju)\n", (uintmax_t)lapic_ipi_wait_mult, 617 (uintmax_t)r, (uintmax_t)tsc_freq); 618 } 619 } 620 #undef LOOPS 621 #endif /* SMP */ 622 } 623 624 /* 625 * Create a local APIC instance. 626 */ 627 static void 628 native_lapic_create(u_int apic_id, int boot_cpu) 629 { 630 int i; 631 632 if (apic_id > max_apic_id) { 633 printf("APIC: Ignoring local APIC with ID %d\n", apic_id); 634 if (boot_cpu) 635 panic("Can't ignore BSP"); 636 return; 637 } 638 KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u", 639 apic_id)); 640 641 /* 642 * Assume no local LVT overrides and a cluster of 0 and 643 * intra-cluster ID of 0. 644 */ 645 lapics[apic_id].la_present = 1; 646 lapics[apic_id].la_id = apic_id; 647 for (i = 0; i <= APIC_LVT_MAX; i++) { 648 lapics[apic_id].la_lvts[i] = lvts[i]; 649 lapics[apic_id].la_lvts[i].lvt_active = 0; 650 } 651 for (i = 0; i <= APIC_ELVT_MAX; i++) { 652 lapics[apic_id].la_elvts[i] = elvts[i]; 653 lapics[apic_id].la_elvts[i].lvt_active = 0; 654 } 655 for (i = 0; i <= APIC_NUM_IOINTS; i++) 656 lapics[apic_id].la_ioint_irqs[i] = IRQ_FREE; 657 lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; 658 lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = 659 IRQ_TIMER; 660 #ifdef KDTRACE_HOOKS 661 lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = 662 IRQ_DTRACE_RET; 663 #endif 664 #ifdef XENHVM 665 lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN; 666 #endif 667 668 669 #ifdef SMP 670 cpu_add(apic_id, boot_cpu); 671 #endif 672 } 673 674 static inline uint32_t 675 amd_read_ext_features(void) 676 { 677 uint32_t version; 678 679 if (cpu_vendor_id != CPU_VENDOR_AMD) 680 return (0); 681 version = lapic_read32(LAPIC_VERSION); 682 if ((version & APIC_VER_AMD_EXT_SPACE) != 0) 683 return (lapic_read32(LAPIC_EXT_FEATURES)); 684 else 685 return (0); 686 } 687 688 static inline uint32_t 689 amd_read_elvt_count(void) 690 { 691 uint32_t extf; 692 uint32_t count; 693 694 extf = amd_read_ext_features(); 695 count = (extf & APIC_EXTF_ELVT_MASK) >> APIC_EXTF_ELVT_SHIFT; 696 count = min(count, APIC_ELVT_MAX + 1); 697 return (count); 698 } 699 700 /* 701 * Dump contents of local APIC registers 702 */ 703 static void 704 native_lapic_dump(const char* str) 705 { 706 uint32_t version; 707 uint32_t maxlvt; 708 uint32_t extf; 709 int elvt_count; 710 int i; 711 712 version = lapic_read32(LAPIC_VERSION); 713 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 714 printf("cpu%d %s:\n", PCPU_GET(cpuid), str); 715 printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x", 716 lapic_read32(LAPIC_ID), version, 717 lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR)); 718 if ((cpu_feature2 & CPUID2_X2APIC) != 0) 719 printf(" x2APIC: %d", x2apic_mode); 720 printf("\n lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", 721 lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1), 722 lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR)); 723 printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x", 724 lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL), 725 lapic_read32(LAPIC_LVT_ERROR)); 726 if (maxlvt >= APIC_LVT_PMC) 727 printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT)); 728 printf("\n"); 729 if (maxlvt >= APIC_LVT_CMCI) 730 printf(" cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI)); 731 extf = amd_read_ext_features(); 732 if (extf != 0) { 733 printf(" AMD ext features: 0x%08x\n", extf); 734 elvt_count = amd_read_elvt_count(); 735 for (i = 0; i < elvt_count; i++) 736 printf(" AMD elvt%d: 0x%08x\n", i, 737 lapic_read32(LAPIC_EXT_LVT0 + i)); 738 } 739 } 740 741 static void 742 native_lapic_xapic_mode(void) 743 { 744 register_t saveintr; 745 746 saveintr = intr_disable(); 747 if (x2apic_mode) 748 native_lapic_enable_x2apic(); 749 intr_restore(saveintr); 750 } 751 752 static void 753 native_lapic_setup(int boot) 754 { 755 struct lapic *la; 756 uint32_t version; 757 uint32_t maxlvt; 758 register_t saveintr; 759 int elvt_count; 760 int i; 761 762 saveintr = intr_disable(); 763 764 la = &lapics[lapic_id()]; 765 KASSERT(la->la_present, ("missing APIC structure")); 766 version = lapic_read32(LAPIC_VERSION); 767 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 768 769 /* Initialize the TPR to allow all interrupts. */ 770 lapic_set_tpr(0); 771 772 /* Setup spurious vector and enable the local APIC. */ 773 lapic_enable(); 774 775 /* Program LINT[01] LVT entries. */ 776 lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0, 777 lapic_read32(LAPIC_LVT_LINT0))); 778 lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1, 779 lapic_read32(LAPIC_LVT_LINT1))); 780 781 /* Program the PMC LVT entry if present. */ 782 if (maxlvt >= APIC_LVT_PMC) { 783 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 784 LAPIC_LVT_PCINT)); 785 } 786 787 /* Program timer LVT. */ 788 la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER, 789 lapic_read32(LAPIC_LVT_TIMER)); 790 la->lvt_timer_last = la->lvt_timer_base; 791 lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base); 792 793 /* Calibrate the timer parameters using BSP. */ 794 if (boot && IS_BSP()) { 795 lapic_calibrate_initcount(la); 796 if (lapic_timer_tsc_deadline) 797 lapic_calibrate_deadline(la); 798 } 799 800 /* Setup the timer if configured. */ 801 if (la->la_timer_mode != LAT_MODE_UNDEF) { 802 KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor", 803 lapic_id())); 804 switch (la->la_timer_mode) { 805 case LAT_MODE_PERIODIC: 806 lapic_timer_set_divisor(lapic_timer_divisor); 807 lapic_timer_periodic(la); 808 break; 809 case LAT_MODE_ONESHOT: 810 lapic_timer_set_divisor(lapic_timer_divisor); 811 lapic_timer_oneshot(la); 812 break; 813 case LAT_MODE_DEADLINE: 814 lapic_timer_deadline(la); 815 break; 816 default: 817 panic("corrupted la_timer_mode %p %d", la, 818 la->la_timer_mode); 819 } 820 } 821 822 /* Program error LVT and clear any existing errors. */ 823 lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR, 824 lapic_read32(LAPIC_LVT_ERROR))); 825 lapic_write32(LAPIC_ESR, 0); 826 827 /* XXX: Thermal LVT */ 828 829 /* Program the CMCI LVT entry if present. */ 830 if (maxlvt >= APIC_LVT_CMCI) { 831 lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI, 832 lapic_read32(LAPIC_LVT_CMCI))); 833 } 834 835 elvt_count = amd_read_elvt_count(); 836 for (i = 0; i < elvt_count; i++) { 837 if (la->la_elvts[i].lvt_active) 838 lapic_write32(LAPIC_EXT_LVT0 + i, 839 elvt_mode(la, i, lapic_read32(LAPIC_EXT_LVT0 + i))); 840 } 841 842 intr_restore(saveintr); 843 } 844 845 static void 846 native_lapic_intrcnt(void *dummy __unused) 847 { 848 struct pcpu *pc; 849 struct lapic *la; 850 char buf[MAXCOMLEN + 1]; 851 852 /* If there are no APICs, skip this function. */ 853 if (lapics == NULL) 854 return; 855 856 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 857 la = &lapics[pc->pc_apic_id]; 858 KASSERT(la->la_present, ("missing APIC structure")); 859 860 snprintf(buf, sizeof(buf), "cpu%d:timer", pc->pc_cpuid); 861 intrcnt_add(buf, &la->la_timer_count); 862 } 863 } 864 SYSINIT(native_lapic_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, native_lapic_intrcnt, 865 NULL); 866 867 static void 868 native_lapic_reenable_pmc(void) 869 { 870 #ifdef HWPMC_HOOKS 871 uint32_t value; 872 873 value = lapic_read32(LAPIC_LVT_PCINT); 874 value &= ~APIC_LVT_M; 875 lapic_write32(LAPIC_LVT_PCINT, value); 876 #endif 877 } 878 879 #ifdef HWPMC_HOOKS 880 static void 881 lapic_update_pmc(void *dummy) 882 { 883 struct lapic *la; 884 885 la = &lapics[lapic_id()]; 886 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 887 lapic_read32(LAPIC_LVT_PCINT))); 888 } 889 #endif 890 891 static int 892 native_lapic_enable_pmc(void) 893 { 894 #ifdef HWPMC_HOOKS 895 u_int32_t maxlvt; 896 897 /* Fail if the local APIC is not present. */ 898 if (!x2apic_mode && lapic_map == NULL) 899 return (0); 900 901 /* Fail if the PMC LVT is not present. */ 902 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 903 if (maxlvt < APIC_LVT_PMC) 904 return (0); 905 906 lvts[APIC_LVT_PMC].lvt_masked = 0; 907 908 #ifdef EARLY_AP_STARTUP 909 MPASS(mp_ncpus == 1 || smp_started); 910 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 911 #else 912 #ifdef SMP 913 /* 914 * If hwpmc was loaded at boot time then the APs may not be 915 * started yet. In that case, don't forward the request to 916 * them as they will program the lvt when they start. 917 */ 918 if (smp_started) 919 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 920 else 921 #endif 922 lapic_update_pmc(NULL); 923 #endif 924 return (1); 925 #else 926 return (0); 927 #endif 928 } 929 930 static void 931 native_lapic_disable_pmc(void) 932 { 933 #ifdef HWPMC_HOOKS 934 u_int32_t maxlvt; 935 936 /* Fail if the local APIC is not present. */ 937 if (!x2apic_mode && lapic_map == NULL) 938 return; 939 940 /* Fail if the PMC LVT is not present. */ 941 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 942 if (maxlvt < APIC_LVT_PMC) 943 return; 944 945 lvts[APIC_LVT_PMC].lvt_masked = 1; 946 947 #ifdef SMP 948 /* The APs should always be started when hwpmc is unloaded. */ 949 KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early")); 950 #endif 951 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 952 #endif 953 } 954 955 static void 956 lapic_calibrate_initcount(struct lapic *la) 957 { 958 u_long value; 959 960 /* Start off with a divisor of 2 (power on reset default). */ 961 lapic_timer_divisor = 2; 962 /* Try to calibrate the local APIC timer. */ 963 do { 964 lapic_timer_set_divisor(lapic_timer_divisor); 965 lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT); 966 DELAY(1000000); 967 value = APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER); 968 if (value != APIC_TIMER_MAX_COUNT) 969 break; 970 lapic_timer_divisor <<= 1; 971 } while (lapic_timer_divisor <= 128); 972 if (lapic_timer_divisor > 128) 973 panic("lapic: Divisor too big"); 974 if (bootverbose) { 975 printf("lapic: Divisor %lu, Frequency %lu Hz\n", 976 lapic_timer_divisor, value); 977 } 978 count_freq = value; 979 } 980 981 static void 982 lapic_calibrate_deadline(struct lapic *la __unused) 983 { 984 985 if (bootverbose) { 986 printf("lapic: deadline tsc mode, Frequency %ju Hz\n", 987 (uintmax_t)tsc_freq); 988 } 989 } 990 991 static void 992 lapic_change_mode(struct eventtimer *et, struct lapic *la, 993 enum lat_timer_mode newmode) 994 { 995 996 if (la->la_timer_mode == newmode) 997 return; 998 switch (newmode) { 999 case LAT_MODE_PERIODIC: 1000 lapic_timer_set_divisor(lapic_timer_divisor); 1001 et->et_frequency = count_freq; 1002 break; 1003 case LAT_MODE_DEADLINE: 1004 et->et_frequency = tsc_freq; 1005 break; 1006 case LAT_MODE_ONESHOT: 1007 lapic_timer_set_divisor(lapic_timer_divisor); 1008 et->et_frequency = count_freq; 1009 break; 1010 default: 1011 panic("lapic_change_mode %d", newmode); 1012 } 1013 la->la_timer_mode = newmode; 1014 et->et_min_period = (0x00000002LLU << 32) / et->et_frequency; 1015 et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency; 1016 } 1017 1018 static int 1019 lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) 1020 { 1021 struct lapic *la; 1022 1023 la = &lapics[PCPU_GET(apic_id)]; 1024 if (period != 0) { 1025 lapic_change_mode(et, la, LAT_MODE_PERIODIC); 1026 la->la_timer_period = ((uint32_t)et->et_frequency * period) >> 1027 32; 1028 lapic_timer_periodic(la); 1029 } else if (lapic_timer_tsc_deadline) { 1030 lapic_change_mode(et, la, LAT_MODE_DEADLINE); 1031 la->la_timer_period = (et->et_frequency * first) >> 32; 1032 lapic_timer_deadline(la); 1033 } else { 1034 lapic_change_mode(et, la, LAT_MODE_ONESHOT); 1035 la->la_timer_period = ((uint32_t)et->et_frequency * first) >> 1036 32; 1037 lapic_timer_oneshot(la); 1038 } 1039 return (0); 1040 } 1041 1042 static int 1043 lapic_et_stop(struct eventtimer *et) 1044 { 1045 struct lapic *la; 1046 1047 la = &lapics[PCPU_GET(apic_id)]; 1048 lapic_timer_stop(la); 1049 la->la_timer_mode = LAT_MODE_UNDEF; 1050 return (0); 1051 } 1052 1053 static void 1054 native_lapic_disable(void) 1055 { 1056 uint32_t value; 1057 1058 /* Software disable the local APIC. */ 1059 value = lapic_read32(LAPIC_SVR); 1060 value &= ~APIC_SVR_SWEN; 1061 lapic_write32(LAPIC_SVR, value); 1062 } 1063 1064 static void 1065 lapic_enable(void) 1066 { 1067 uint32_t value; 1068 1069 /* Program the spurious vector to enable the local APIC. */ 1070 value = lapic_read32(LAPIC_SVR); 1071 value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS); 1072 value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT; 1073 if (lapic_eoi_suppression) 1074 value |= APIC_SVR_EOI_SUPPRESSION; 1075 lapic_write32(LAPIC_SVR, value); 1076 } 1077 1078 /* Reset the local APIC on the BSP during resume. */ 1079 static void 1080 lapic_resume(struct pic *pic, bool suspend_cancelled) 1081 { 1082 1083 lapic_setup(0); 1084 } 1085 1086 static int 1087 native_lapic_id(void) 1088 { 1089 uint32_t v; 1090 1091 KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped")); 1092 v = lapic_read32(LAPIC_ID); 1093 if (!x2apic_mode) 1094 v >>= APIC_ID_SHIFT; 1095 return (v); 1096 } 1097 1098 static int 1099 native_lapic_intr_pending(u_int vector) 1100 { 1101 uint32_t irr; 1102 1103 /* 1104 * The IRR registers are an array of registers each of which 1105 * only describes 32 interrupts in the low 32 bits. Thus, we 1106 * divide the vector by 32 to get the register index. 1107 * Finally, we modulus the vector by 32 to determine the 1108 * individual bit to test. 1109 */ 1110 irr = lapic_read32(LAPIC_IRR0 + vector / 32); 1111 return (irr & 1 << (vector % 32)); 1112 } 1113 1114 static void 1115 native_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) 1116 { 1117 struct lapic *la; 1118 1119 KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist", 1120 __func__, apic_id)); 1121 KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big", 1122 __func__, cluster)); 1123 KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID, 1124 ("%s: intra cluster id %u too big", __func__, cluster_id)); 1125 la = &lapics[apic_id]; 1126 la->la_cluster = cluster; 1127 la->la_cluster_id = cluster_id; 1128 } 1129 1130 static int 1131 native_lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked) 1132 { 1133 1134 if (pin > APIC_LVT_MAX) 1135 return (EINVAL); 1136 if (apic_id == APIC_ID_ALL) { 1137 lvts[pin].lvt_masked = masked; 1138 if (bootverbose) 1139 printf("lapic:"); 1140 } else { 1141 KASSERT(lapics[apic_id].la_present, 1142 ("%s: missing APIC %u", __func__, apic_id)); 1143 lapics[apic_id].la_lvts[pin].lvt_masked = masked; 1144 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1145 if (bootverbose) 1146 printf("lapic%u:", apic_id); 1147 } 1148 if (bootverbose) 1149 printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked"); 1150 return (0); 1151 } 1152 1153 static int 1154 native_lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode) 1155 { 1156 struct lvt *lvt; 1157 1158 if (pin > APIC_LVT_MAX) 1159 return (EINVAL); 1160 if (apic_id == APIC_ID_ALL) { 1161 lvt = &lvts[pin]; 1162 if (bootverbose) 1163 printf("lapic:"); 1164 } else { 1165 KASSERT(lapics[apic_id].la_present, 1166 ("%s: missing APIC %u", __func__, apic_id)); 1167 lvt = &lapics[apic_id].la_lvts[pin]; 1168 lvt->lvt_active = 1; 1169 if (bootverbose) 1170 printf("lapic%u:", apic_id); 1171 } 1172 lvt->lvt_mode = mode; 1173 switch (mode) { 1174 case APIC_LVT_DM_NMI: 1175 case APIC_LVT_DM_SMI: 1176 case APIC_LVT_DM_INIT: 1177 case APIC_LVT_DM_EXTINT: 1178 lvt->lvt_edgetrigger = 1; 1179 lvt->lvt_activehi = 1; 1180 if (mode == APIC_LVT_DM_EXTINT) 1181 lvt->lvt_masked = 1; 1182 else 1183 lvt->lvt_masked = 0; 1184 break; 1185 default: 1186 panic("Unsupported delivery mode: 0x%x\n", mode); 1187 } 1188 if (bootverbose) { 1189 printf(" Routing "); 1190 switch (mode) { 1191 case APIC_LVT_DM_NMI: 1192 printf("NMI"); 1193 break; 1194 case APIC_LVT_DM_SMI: 1195 printf("SMI"); 1196 break; 1197 case APIC_LVT_DM_INIT: 1198 printf("INIT"); 1199 break; 1200 case APIC_LVT_DM_EXTINT: 1201 printf("ExtINT"); 1202 break; 1203 } 1204 printf(" -> LINT%u\n", pin); 1205 } 1206 return (0); 1207 } 1208 1209 static int 1210 native_lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol) 1211 { 1212 1213 if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM) 1214 return (EINVAL); 1215 if (apic_id == APIC_ID_ALL) { 1216 lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); 1217 if (bootverbose) 1218 printf("lapic:"); 1219 } else { 1220 KASSERT(lapics[apic_id].la_present, 1221 ("%s: missing APIC %u", __func__, apic_id)); 1222 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1223 lapics[apic_id].la_lvts[pin].lvt_activehi = 1224 (pol == INTR_POLARITY_HIGH); 1225 if (bootverbose) 1226 printf("lapic%u:", apic_id); 1227 } 1228 if (bootverbose) 1229 printf(" LINT%u polarity: %s\n", pin, 1230 pol == INTR_POLARITY_HIGH ? "high" : "low"); 1231 return (0); 1232 } 1233 1234 static int 1235 native_lapic_set_lvt_triggermode(u_int apic_id, u_int pin, 1236 enum intr_trigger trigger) 1237 { 1238 1239 if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM) 1240 return (EINVAL); 1241 if (apic_id == APIC_ID_ALL) { 1242 lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); 1243 if (bootverbose) 1244 printf("lapic:"); 1245 } else { 1246 KASSERT(lapics[apic_id].la_present, 1247 ("%s: missing APIC %u", __func__, apic_id)); 1248 lapics[apic_id].la_lvts[pin].lvt_edgetrigger = 1249 (trigger == INTR_TRIGGER_EDGE); 1250 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1251 if (bootverbose) 1252 printf("lapic%u:", apic_id); 1253 } 1254 if (bootverbose) 1255 printf(" LINT%u trigger: %s\n", pin, 1256 trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); 1257 return (0); 1258 } 1259 1260 /* 1261 * Adjust the TPR of the current CPU so that it blocks all interrupts below 1262 * the passed in vector. 1263 */ 1264 static void 1265 lapic_set_tpr(u_int vector) 1266 { 1267 #ifdef CHEAP_TPR 1268 lapic_write32(LAPIC_TPR, vector); 1269 #else 1270 uint32_t tpr; 1271 1272 tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO; 1273 tpr |= vector; 1274 lapic_write32(LAPIC_TPR, tpr); 1275 #endif 1276 } 1277 1278 static void 1279 native_lapic_eoi(void) 1280 { 1281 1282 lapic_write32_nofence(LAPIC_EOI, 0); 1283 } 1284 1285 void 1286 lapic_handle_intr(int vector, struct trapframe *frame) 1287 { 1288 struct intsrc *isrc; 1289 1290 isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id), 1291 vector)); 1292 intr_execute_handlers(isrc, frame); 1293 } 1294 1295 void 1296 lapic_handle_timer(struct trapframe *frame) 1297 { 1298 struct lapic *la; 1299 struct trapframe *oldframe; 1300 struct thread *td; 1301 1302 /* Send EOI first thing. */ 1303 lapic_eoi(); 1304 1305 #if defined(SMP) && !defined(SCHED_ULE) 1306 /* 1307 * Don't do any accounting for the disabled HTT cores, since it 1308 * will provide misleading numbers for the userland. 1309 * 1310 * No locking is necessary here, since even if we lose the race 1311 * when hlt_cpus_mask changes it is not a big deal, really. 1312 * 1313 * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask 1314 * and unlike other schedulers it actually schedules threads to 1315 * those CPUs. 1316 */ 1317 if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)) 1318 return; 1319 #endif 1320 1321 /* Look up our local APIC structure for the tick counters. */ 1322 la = &lapics[PCPU_GET(apic_id)]; 1323 (*la->la_timer_count)++; 1324 critical_enter(); 1325 if (lapic_et.et_active) { 1326 td = curthread; 1327 td->td_intr_nesting_level++; 1328 oldframe = td->td_intr_frame; 1329 td->td_intr_frame = frame; 1330 lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg); 1331 td->td_intr_frame = oldframe; 1332 td->td_intr_nesting_level--; 1333 } 1334 critical_exit(); 1335 } 1336 1337 static void 1338 lapic_timer_set_divisor(u_int divisor) 1339 { 1340 1341 KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor)); 1342 KASSERT(ffs(divisor) <= nitems(lapic_timer_divisors), 1343 ("lapic: invalid divisor %u", divisor)); 1344 lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]); 1345 } 1346 1347 static void 1348 lapic_timer_oneshot(struct lapic *la) 1349 { 1350 uint32_t value; 1351 1352 value = la->lvt_timer_base; 1353 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1354 value |= APIC_LVTT_TM_ONE_SHOT; 1355 la->lvt_timer_last = value; 1356 lapic_write32(LAPIC_LVT_TIMER, value); 1357 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1358 } 1359 1360 static void 1361 lapic_timer_oneshot_nointr(struct lapic *la, uint32_t count) 1362 { 1363 uint32_t value; 1364 1365 value = la->lvt_timer_base; 1366 value &= ~APIC_LVTT_TM; 1367 value |= APIC_LVTT_TM_ONE_SHOT | APIC_LVT_M; 1368 la->lvt_timer_last = value; 1369 lapic_write32(LAPIC_LVT_TIMER, value); 1370 lapic_write32(LAPIC_ICR_TIMER, count); 1371 } 1372 1373 static void 1374 lapic_timer_periodic(struct lapic *la) 1375 { 1376 uint32_t value; 1377 1378 value = la->lvt_timer_base; 1379 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1380 value |= APIC_LVTT_TM_PERIODIC; 1381 la->lvt_timer_last = value; 1382 lapic_write32(LAPIC_LVT_TIMER, value); 1383 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1384 } 1385 1386 static void 1387 lapic_timer_deadline(struct lapic *la) 1388 { 1389 uint32_t value; 1390 1391 value = la->lvt_timer_base; 1392 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1393 value |= APIC_LVTT_TM_TSCDLT; 1394 if (value != la->lvt_timer_last) { 1395 la->lvt_timer_last = value; 1396 lapic_write32_nofence(LAPIC_LVT_TIMER, value); 1397 if (!x2apic_mode) 1398 mfence(); 1399 } 1400 wrmsr(MSR_TSC_DEADLINE, la->la_timer_period + rdtsc()); 1401 } 1402 1403 static void 1404 lapic_timer_stop(struct lapic *la) 1405 { 1406 uint32_t value; 1407 1408 if (la->la_timer_mode == LAT_MODE_DEADLINE) { 1409 wrmsr(MSR_TSC_DEADLINE, 0); 1410 mfence(); 1411 } else { 1412 value = la->lvt_timer_base; 1413 value &= ~APIC_LVTT_TM; 1414 value |= APIC_LVT_M; 1415 la->lvt_timer_last = value; 1416 lapic_write32(LAPIC_LVT_TIMER, value); 1417 } 1418 } 1419 1420 void 1421 lapic_handle_cmc(void) 1422 { 1423 1424 lapic_eoi(); 1425 cmc_intr(); 1426 } 1427 1428 /* 1429 * Called from the mca_init() to activate the CMC interrupt if this CPU is 1430 * responsible for monitoring any MC banks for CMC events. Since mca_init() 1431 * is called prior to lapic_setup() during boot, this just needs to unmask 1432 * this CPU's LVT_CMCI entry. 1433 */ 1434 static void 1435 native_lapic_enable_cmc(void) 1436 { 1437 u_int apic_id; 1438 1439 #ifdef DEV_ATPIC 1440 if (!x2apic_mode && lapic_map == NULL) 1441 return; 1442 #endif 1443 apic_id = PCPU_GET(apic_id); 1444 KASSERT(lapics[apic_id].la_present, 1445 ("%s: missing APIC %u", __func__, apic_id)); 1446 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0; 1447 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1; 1448 if (bootverbose) 1449 printf("lapic%u: CMCI unmasked\n", apic_id); 1450 } 1451 1452 static int 1453 native_lapic_enable_mca_elvt(void) 1454 { 1455 u_int apic_id; 1456 uint32_t value; 1457 int elvt_count; 1458 1459 #ifdef DEV_ATPIC 1460 if (lapic_map == NULL) 1461 return (-1); 1462 #endif 1463 1464 apic_id = PCPU_GET(apic_id); 1465 KASSERT(lapics[apic_id].la_present, 1466 ("%s: missing APIC %u", __func__, apic_id)); 1467 elvt_count = amd_read_elvt_count(); 1468 if (elvt_count <= APIC_ELVT_MCA) 1469 return (-1); 1470 1471 value = lapic_read32(LAPIC_EXT_LVT0 + APIC_ELVT_MCA); 1472 if ((value & APIC_LVT_M) == 0) { 1473 if (bootverbose) 1474 printf("AMD MCE Thresholding Extended LVT is already active\n"); 1475 return (APIC_ELVT_MCA); 1476 } 1477 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_masked = 0; 1478 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_active = 1; 1479 if (bootverbose) 1480 printf("lapic%u: MCE Thresholding ELVT unmasked\n", apic_id); 1481 return (APIC_ELVT_MCA); 1482 } 1483 1484 void 1485 lapic_handle_error(void) 1486 { 1487 uint32_t esr; 1488 1489 /* 1490 * Read the contents of the error status register. Write to 1491 * the register first before reading from it to force the APIC 1492 * to update its value to indicate any errors that have 1493 * occurred since the previous write to the register. 1494 */ 1495 lapic_write32(LAPIC_ESR, 0); 1496 esr = lapic_read32(LAPIC_ESR); 1497 1498 printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr); 1499 lapic_eoi(); 1500 } 1501 1502 static u_int 1503 native_apic_cpuid(u_int apic_id) 1504 { 1505 #ifdef SMP 1506 return apic_cpuids[apic_id]; 1507 #else 1508 return 0; 1509 #endif 1510 } 1511 1512 /* Request a free IDT vector to be used by the specified IRQ. */ 1513 static u_int 1514 native_apic_alloc_vector(u_int apic_id, u_int irq) 1515 { 1516 u_int vector; 1517 1518 KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); 1519 1520 /* 1521 * Search for a free vector. Currently we just use a very simple 1522 * algorithm to find the first free vector. 1523 */ 1524 mtx_lock_spin(&icu_lock); 1525 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1526 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) 1527 continue; 1528 lapics[apic_id].la_ioint_irqs[vector] = irq; 1529 mtx_unlock_spin(&icu_lock); 1530 return (vector + APIC_IO_INTS); 1531 } 1532 mtx_unlock_spin(&icu_lock); 1533 return (0); 1534 } 1535 1536 /* 1537 * Request 'count' free contiguous IDT vectors to be used by 'count' 1538 * IRQs. 'count' must be a power of two and the vectors will be 1539 * aligned on a boundary of 'align'. If the request cannot be 1540 * satisfied, 0 is returned. 1541 */ 1542 static u_int 1543 native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) 1544 { 1545 u_int first, run, vector; 1546 1547 KASSERT(powerof2(count), ("bad count")); 1548 KASSERT(powerof2(align), ("bad align")); 1549 KASSERT(align >= count, ("align < count")); 1550 #ifdef INVARIANTS 1551 for (run = 0; run < count; run++) 1552 KASSERT(irqs[run] < num_io_irqs, ("Invalid IRQ %u at index %u", 1553 irqs[run], run)); 1554 #endif 1555 1556 /* 1557 * Search for 'count' free vectors. As with apic_alloc_vector(), 1558 * this just uses a simple first fit algorithm. 1559 */ 1560 run = 0; 1561 first = 0; 1562 mtx_lock_spin(&icu_lock); 1563 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1564 1565 /* Vector is in use, end run. */ 1566 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) { 1567 run = 0; 1568 first = 0; 1569 continue; 1570 } 1571 1572 /* Start a new run if run == 0 and vector is aligned. */ 1573 if (run == 0) { 1574 if ((vector & (align - 1)) != 0) 1575 continue; 1576 first = vector; 1577 } 1578 run++; 1579 1580 /* Keep looping if the run isn't long enough yet. */ 1581 if (run < count) 1582 continue; 1583 1584 /* Found a run, assign IRQs and return the first vector. */ 1585 for (vector = 0; vector < count; vector++) 1586 lapics[apic_id].la_ioint_irqs[first + vector] = 1587 irqs[vector]; 1588 mtx_unlock_spin(&icu_lock); 1589 return (first + APIC_IO_INTS); 1590 } 1591 mtx_unlock_spin(&icu_lock); 1592 printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count); 1593 return (0); 1594 } 1595 1596 /* 1597 * Enable a vector for a particular apic_id. Since all lapics share idt 1598 * entries and ioint_handlers this enables the vector on all lapics. lapics 1599 * which do not have the vector configured would report spurious interrupts 1600 * should it fire. 1601 */ 1602 static void 1603 native_apic_enable_vector(u_int apic_id, u_int vector) 1604 { 1605 1606 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1607 KASSERT(ioint_handlers[vector / 32] != NULL, 1608 ("No ISR handler for vector %u", vector)); 1609 #ifdef KDTRACE_HOOKS 1610 KASSERT(vector != IDT_DTRACE_RET, 1611 ("Attempt to overwrite DTrace entry")); 1612 #endif 1613 setidt(vector, (pti ? ioint_pti_handlers : ioint_handlers)[vector / 32], 1614 SDT_APIC, SEL_KPL, GSEL_APIC); 1615 } 1616 1617 static void 1618 native_apic_disable_vector(u_int apic_id, u_int vector) 1619 { 1620 1621 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1622 #ifdef KDTRACE_HOOKS 1623 KASSERT(vector != IDT_DTRACE_RET, 1624 ("Attempt to overwrite DTrace entry")); 1625 #endif 1626 KASSERT(ioint_handlers[vector / 32] != NULL, 1627 ("No ISR handler for vector %u", vector)); 1628 #ifdef notyet 1629 /* 1630 * We can not currently clear the idt entry because other cpus 1631 * may have a valid vector at this offset. 1632 */ 1633 setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC, 1634 SEL_KPL, GSEL_APIC); 1635 #endif 1636 } 1637 1638 /* Release an APIC vector when it's no longer in use. */ 1639 static void 1640 native_apic_free_vector(u_int apic_id, u_int vector, u_int irq) 1641 { 1642 struct thread *td; 1643 1644 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1645 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1646 ("Vector %u does not map to an IRQ line", vector)); 1647 KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); 1648 KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] == 1649 irq, ("IRQ mismatch")); 1650 #ifdef KDTRACE_HOOKS 1651 KASSERT(vector != IDT_DTRACE_RET, 1652 ("Attempt to overwrite DTrace entry")); 1653 #endif 1654 1655 /* 1656 * Bind us to the cpu that owned the vector before freeing it so 1657 * we don't lose an interrupt delivery race. 1658 */ 1659 td = curthread; 1660 if (!rebooting) { 1661 thread_lock(td); 1662 if (sched_is_bound(td)) 1663 panic("apic_free_vector: Thread already bound.\n"); 1664 sched_bind(td, apic_cpuid(apic_id)); 1665 thread_unlock(td); 1666 } 1667 mtx_lock_spin(&icu_lock); 1668 lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = IRQ_FREE; 1669 mtx_unlock_spin(&icu_lock); 1670 if (!rebooting) { 1671 thread_lock(td); 1672 sched_unbind(td); 1673 thread_unlock(td); 1674 } 1675 } 1676 1677 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */ 1678 static u_int 1679 apic_idt_to_irq(u_int apic_id, u_int vector) 1680 { 1681 int irq; 1682 1683 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1684 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1685 ("Vector %u does not map to an IRQ line", vector)); 1686 #ifdef KDTRACE_HOOKS 1687 KASSERT(vector != IDT_DTRACE_RET, 1688 ("Attempt to overwrite DTrace entry")); 1689 #endif 1690 irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]; 1691 if (irq < 0) 1692 irq = 0; 1693 return (irq); 1694 } 1695 1696 #ifdef DDB 1697 /* 1698 * Dump data about APIC IDT vector mappings. 1699 */ 1700 DB_SHOW_COMMAND(apic, db_show_apic) 1701 { 1702 struct intsrc *isrc; 1703 int i, verbose; 1704 u_int apic_id; 1705 u_int irq; 1706 1707 if (strcmp(modif, "vv") == 0) 1708 verbose = 2; 1709 else if (strcmp(modif, "v") == 0) 1710 verbose = 1; 1711 else 1712 verbose = 0; 1713 for (apic_id = 0; apic_id <= max_apic_id; apic_id++) { 1714 if (lapics[apic_id].la_present == 0) 1715 continue; 1716 db_printf("Interrupts bound to lapic %u\n", apic_id); 1717 for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) { 1718 irq = lapics[apic_id].la_ioint_irqs[i]; 1719 if (irq == IRQ_FREE || irq == IRQ_SYSCALL) 1720 continue; 1721 #ifdef KDTRACE_HOOKS 1722 if (irq == IRQ_DTRACE_RET) 1723 continue; 1724 #endif 1725 #ifdef XENHVM 1726 if (irq == IRQ_EVTCHN) 1727 continue; 1728 #endif 1729 db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); 1730 if (irq == IRQ_TIMER) 1731 db_printf("lapic timer\n"); 1732 else if (irq < num_io_irqs) { 1733 isrc = intr_lookup_source(irq); 1734 if (isrc == NULL || verbose == 0) 1735 db_printf("IRQ %u\n", irq); 1736 else 1737 db_dump_intr_event(isrc->is_event, 1738 verbose == 2); 1739 } else 1740 db_printf("IRQ %u ???\n", irq); 1741 } 1742 } 1743 } 1744 1745 static void 1746 dump_mask(const char *prefix, uint32_t v, int base) 1747 { 1748 int i, first; 1749 1750 first = 1; 1751 for (i = 0; i < 32; i++) 1752 if (v & (1 << i)) { 1753 if (first) { 1754 db_printf("%s:", prefix); 1755 first = 0; 1756 } 1757 db_printf(" %02x", base + i); 1758 } 1759 if (!first) 1760 db_printf("\n"); 1761 } 1762 1763 /* Show info from the lapic regs for this CPU. */ 1764 DB_SHOW_COMMAND(lapic, db_show_lapic) 1765 { 1766 uint32_t v; 1767 1768 db_printf("lapic ID = %d\n", lapic_id()); 1769 v = lapic_read32(LAPIC_VERSION); 1770 db_printf("version = %d.%d\n", (v & APIC_VER_VERSION) >> 4, 1771 v & 0xf); 1772 db_printf("max LVT = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT); 1773 v = lapic_read32(LAPIC_SVR); 1774 db_printf("SVR = %02x (%s)\n", v & APIC_SVR_VECTOR, 1775 v & APIC_SVR_ENABLE ? "enabled" : "disabled"); 1776 db_printf("TPR = %02x\n", lapic_read32(LAPIC_TPR)); 1777 1778 #define dump_field(prefix, regn, index) \ 1779 dump_mask(__XSTRING(prefix ## index), \ 1780 lapic_read32(LAPIC_ ## regn ## index), \ 1781 index * 32) 1782 1783 db_printf("In-service Interrupts:\n"); 1784 dump_field(isr, ISR, 0); 1785 dump_field(isr, ISR, 1); 1786 dump_field(isr, ISR, 2); 1787 dump_field(isr, ISR, 3); 1788 dump_field(isr, ISR, 4); 1789 dump_field(isr, ISR, 5); 1790 dump_field(isr, ISR, 6); 1791 dump_field(isr, ISR, 7); 1792 1793 db_printf("TMR Interrupts:\n"); 1794 dump_field(tmr, TMR, 0); 1795 dump_field(tmr, TMR, 1); 1796 dump_field(tmr, TMR, 2); 1797 dump_field(tmr, TMR, 3); 1798 dump_field(tmr, TMR, 4); 1799 dump_field(tmr, TMR, 5); 1800 dump_field(tmr, TMR, 6); 1801 dump_field(tmr, TMR, 7); 1802 1803 db_printf("IRR Interrupts:\n"); 1804 dump_field(irr, IRR, 0); 1805 dump_field(irr, IRR, 1); 1806 dump_field(irr, IRR, 2); 1807 dump_field(irr, IRR, 3); 1808 dump_field(irr, IRR, 4); 1809 dump_field(irr, IRR, 5); 1810 dump_field(irr, IRR, 6); 1811 dump_field(irr, IRR, 7); 1812 1813 #undef dump_field 1814 } 1815 #endif 1816 1817 /* 1818 * APIC probing support code. This includes code to manage enumerators. 1819 */ 1820 1821 static SLIST_HEAD(, apic_enumerator) enumerators = 1822 SLIST_HEAD_INITIALIZER(enumerators); 1823 static struct apic_enumerator *best_enum; 1824 1825 void 1826 apic_register_enumerator(struct apic_enumerator *enumerator) 1827 { 1828 #ifdef INVARIANTS 1829 struct apic_enumerator *apic_enum; 1830 1831 SLIST_FOREACH(apic_enum, &enumerators, apic_next) { 1832 if (apic_enum == enumerator) 1833 panic("%s: Duplicate register of %s", __func__, 1834 enumerator->apic_name); 1835 } 1836 #endif 1837 SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next); 1838 } 1839 1840 /* 1841 * We have to look for CPU's very, very early because certain subsystems 1842 * want to know how many CPU's we have extremely early on in the boot 1843 * process. 1844 */ 1845 static void 1846 apic_init(void *dummy __unused) 1847 { 1848 struct apic_enumerator *enumerator; 1849 int retval, best; 1850 1851 /* We only support built in local APICs. */ 1852 if (!(cpu_feature & CPUID_APIC)) 1853 return; 1854 1855 /* Don't probe if APIC mode is disabled. */ 1856 if (resource_disabled("apic", 0)) 1857 return; 1858 1859 /* Probe all the enumerators to find the best match. */ 1860 best_enum = NULL; 1861 best = 0; 1862 SLIST_FOREACH(enumerator, &enumerators, apic_next) { 1863 retval = enumerator->apic_probe(); 1864 if (retval > 0) 1865 continue; 1866 if (best_enum == NULL || best < retval) { 1867 best_enum = enumerator; 1868 best = retval; 1869 } 1870 } 1871 if (best_enum == NULL) { 1872 if (bootverbose) 1873 printf("APIC: Could not find any APICs.\n"); 1874 #ifndef DEV_ATPIC 1875 panic("running without device atpic requires a local APIC"); 1876 #endif 1877 return; 1878 } 1879 1880 if (bootverbose) 1881 printf("APIC: Using the %s enumerator.\n", 1882 best_enum->apic_name); 1883 1884 #ifdef I686_CPU 1885 /* 1886 * To work around an errata, we disable the local APIC on some 1887 * CPUs during early startup. We need to turn the local APIC back 1888 * on on such CPUs now. 1889 */ 1890 ppro_reenable_apic(); 1891 #endif 1892 1893 /* Probe the CPU's in the system. */ 1894 retval = best_enum->apic_probe_cpus(); 1895 if (retval != 0) 1896 printf("%s: Failed to probe CPUs: returned %d\n", 1897 best_enum->apic_name, retval); 1898 1899 } 1900 SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL); 1901 1902 /* 1903 * Setup the local APIC. We have to do this prior to starting up the APs 1904 * in the SMP case. 1905 */ 1906 static void 1907 apic_setup_local(void *dummy __unused) 1908 { 1909 int retval; 1910 1911 if (best_enum == NULL) 1912 return; 1913 1914 lapics = malloc(sizeof(*lapics) * (max_apic_id + 1), M_LAPIC, 1915 M_WAITOK | M_ZERO); 1916 1917 /* Initialize the local APIC. */ 1918 retval = best_enum->apic_setup_local(); 1919 if (retval != 0) 1920 printf("%s: Failed to setup the local APIC: returned %d\n", 1921 best_enum->apic_name, retval); 1922 } 1923 SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL); 1924 1925 /* 1926 * Setup the I/O APICs. 1927 */ 1928 static void 1929 apic_setup_io(void *dummy __unused) 1930 { 1931 int retval; 1932 1933 if (best_enum == NULL) 1934 return; 1935 1936 /* 1937 * Local APIC must be registered before other PICs and pseudo PICs 1938 * for proper suspend/resume order. 1939 */ 1940 intr_register_pic(&lapic_pic); 1941 1942 retval = best_enum->apic_setup_io(); 1943 if (retval != 0) 1944 printf("%s: Failed to setup I/O APICs: returned %d\n", 1945 best_enum->apic_name, retval); 1946 1947 /* 1948 * Finish setting up the local APIC on the BSP once we know 1949 * how to properly program the LINT pins. In particular, this 1950 * enables the EOI suppression mode, if LAPIC supports it and 1951 * user did not disable the mode. 1952 */ 1953 lapic_setup(1); 1954 if (bootverbose) 1955 lapic_dump("BSP"); 1956 1957 /* Enable the MSI "pic". */ 1958 init_ops.msi_init(); 1959 1960 #ifdef XENHVM 1961 xen_intr_alloc_irqs(); 1962 #endif 1963 } 1964 SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL); 1965 1966 #ifdef SMP 1967 /* 1968 * Inter Processor Interrupt functions. The lapic_ipi_*() functions are 1969 * private to the MD code. The public interface for the rest of the 1970 * kernel is defined in mp_machdep.c. 1971 */ 1972 1973 /* 1974 * Wait delay microseconds for IPI to be sent. If delay is -1, we 1975 * wait forever. 1976 */ 1977 static int 1978 native_lapic_ipi_wait(int delay) 1979 { 1980 uint64_t rx; 1981 1982 /* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */ 1983 if (x2apic_mode) 1984 return (1); 1985 1986 for (rx = 0; delay == -1 || rx < lapic_ipi_wait_mult * delay; rx++) { 1987 if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) == 1988 APIC_DELSTAT_IDLE) 1989 return (1); 1990 ia32_pause(); 1991 } 1992 return (0); 1993 } 1994 1995 static void 1996 native_lapic_ipi_raw(register_t icrlo, u_int dest) 1997 { 1998 uint64_t icr; 1999 uint32_t vhi, vlo; 2000 register_t saveintr; 2001 2002 /* XXX: Need more sanity checking of icrlo? */ 2003 KASSERT(x2apic_mode || lapic_map != NULL, 2004 ("%s called too early", __func__)); 2005 KASSERT(x2apic_mode || 2006 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 2007 ("%s: invalid dest field", __func__)); 2008 KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0, 2009 ("%s: reserved bits set in ICR LO register", __func__)); 2010 2011 /* Set destination in ICR HI register if it is being used. */ 2012 if (!x2apic_mode) { 2013 saveintr = intr_disable(); 2014 icr = lapic_read_icr(); 2015 } 2016 2017 if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) { 2018 if (x2apic_mode) { 2019 vhi = dest; 2020 } else { 2021 vhi = icr >> 32; 2022 vhi &= ~APIC_ID_MASK; 2023 vhi |= dest << APIC_ID_SHIFT; 2024 } 2025 } else { 2026 vhi = 0; 2027 } 2028 2029 /* Program the contents of the IPI and dispatch it. */ 2030 if (x2apic_mode) { 2031 vlo = icrlo; 2032 } else { 2033 vlo = icr; 2034 vlo &= APIC_ICRLO_RESV_MASK; 2035 vlo |= icrlo; 2036 } 2037 lapic_write_icr(vhi, vlo); 2038 if (!x2apic_mode) 2039 intr_restore(saveintr); 2040 } 2041 2042 #define BEFORE_SPIN 50000 2043 #ifdef DETECT_DEADLOCK 2044 #define AFTER_SPIN 50 2045 #endif 2046 2047 static void 2048 native_lapic_ipi_vectored(u_int vector, int dest) 2049 { 2050 register_t icrlo, destfield; 2051 2052 KASSERT((vector & ~APIC_VECTOR_MASK) == 0, 2053 ("%s: invalid vector %d", __func__, vector)); 2054 2055 icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT; 2056 2057 /* 2058 * NMI IPIs are just fake vectors used to send a NMI. Use special rules 2059 * regarding NMIs if passed, otherwise specify the vector. 2060 */ 2061 if (vector >= IPI_NMI_FIRST) 2062 icrlo |= APIC_DELMODE_NMI; 2063 else 2064 icrlo |= vector | APIC_DELMODE_FIXED; 2065 destfield = 0; 2066 switch (dest) { 2067 case APIC_IPI_DEST_SELF: 2068 icrlo |= APIC_DEST_SELF; 2069 break; 2070 case APIC_IPI_DEST_ALL: 2071 icrlo |= APIC_DEST_ALLISELF; 2072 break; 2073 case APIC_IPI_DEST_OTHERS: 2074 icrlo |= APIC_DEST_ALLESELF; 2075 break; 2076 default: 2077 KASSERT(x2apic_mode || 2078 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 2079 ("%s: invalid destination 0x%x", __func__, dest)); 2080 destfield = dest; 2081 } 2082 2083 /* Wait for an earlier IPI to finish. */ 2084 if (!lapic_ipi_wait(BEFORE_SPIN)) { 2085 if (panicstr != NULL) 2086 return; 2087 else 2088 panic("APIC: Previous IPI is stuck"); 2089 } 2090 2091 lapic_ipi_raw(icrlo, destfield); 2092 2093 #ifdef DETECT_DEADLOCK 2094 /* Wait for IPI to be delivered. */ 2095 if (!lapic_ipi_wait(AFTER_SPIN)) { 2096 #ifdef needsattention 2097 /* 2098 * XXX FIXME: 2099 * 2100 * The above function waits for the message to actually be 2101 * delivered. It breaks out after an arbitrary timeout 2102 * since the message should eventually be delivered (at 2103 * least in theory) and that if it wasn't we would catch 2104 * the failure with the check above when the next IPI is 2105 * sent. 2106 * 2107 * We could skip this wait entirely, EXCEPT it probably 2108 * protects us from other routines that assume that the 2109 * message was delivered and acted upon when this function 2110 * returns. 2111 */ 2112 printf("APIC: IPI might be stuck\n"); 2113 #else /* !needsattention */ 2114 /* Wait until mesage is sent without a timeout. */ 2115 while (lapic_read_icr_lo() & APIC_DELSTAT_PEND) 2116 ia32_pause(); 2117 #endif /* needsattention */ 2118 } 2119 #endif /* DETECT_DEADLOCK */ 2120 } 2121 2122 #endif /* SMP */ 2123 2124 /* 2125 * Since the IDT is shared by all CPUs the IPI slot update needs to be globally 2126 * visible. 2127 * 2128 * Consider the case where an IPI is generated immediately after allocation: 2129 * vector = lapic_ipi_alloc(ipifunc); 2130 * ipi_selected(other_cpus, vector); 2131 * 2132 * In xAPIC mode a write to ICR_LO has serializing semantics because the 2133 * APIC page is mapped as an uncached region. In x2APIC mode there is an 2134 * explicit 'mfence' before the ICR MSR is written. Therefore in both cases 2135 * the IDT slot update is globally visible before the IPI is delivered. 2136 */ 2137 static int 2138 native_lapic_ipi_alloc(inthand_t *ipifunc) 2139 { 2140 struct gate_descriptor *ip; 2141 long func; 2142 int idx, vector; 2143 2144 KASSERT(ipifunc != &IDTVEC(rsvd) && ipifunc != &IDTVEC(rsvd_pti), 2145 ("invalid ipifunc %p", ipifunc)); 2146 2147 vector = -1; 2148 mtx_lock_spin(&icu_lock); 2149 for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) { 2150 ip = &idt[idx]; 2151 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2152 if ((!pti && func == (uintptr_t)&IDTVEC(rsvd)) || 2153 (pti && func == (uintptr_t)&IDTVEC(rsvd_pti))) { 2154 vector = idx; 2155 setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC); 2156 break; 2157 } 2158 } 2159 mtx_unlock_spin(&icu_lock); 2160 return (vector); 2161 } 2162 2163 static void 2164 native_lapic_ipi_free(int vector) 2165 { 2166 struct gate_descriptor *ip; 2167 long func; 2168 2169 KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST, 2170 ("%s: invalid vector %d", __func__, vector)); 2171 2172 mtx_lock_spin(&icu_lock); 2173 ip = &idt[vector]; 2174 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2175 KASSERT(func != (uintptr_t)&IDTVEC(rsvd) && 2176 func != (uintptr_t)&IDTVEC(rsvd_pti), 2177 ("invalid idtfunc %#lx", func)); 2178 setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC, 2179 SEL_KPL, GSEL_APIC); 2180 mtx_unlock_spin(&icu_lock); 2181 } 2182