1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org> 5 * Copyright (c) 1996, by Steve Passe 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. The name of the developer may NOT be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 3. Neither the name of the author nor the names of any co-contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Local APIC support on Pentium and later processors. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_atpic.h" 40 #include "opt_hwpmc_hooks.h" 41 42 #include "opt_ddb.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bus.h> 47 #include <sys/kernel.h> 48 #include <sys/lock.h> 49 #include <sys/malloc.h> 50 #include <sys/mutex.h> 51 #include <sys/pcpu.h> 52 #include <sys/proc.h> 53 #include <sys/sched.h> 54 #include <sys/smp.h> 55 #include <sys/sysctl.h> 56 #include <sys/timeet.h> 57 58 #include <vm/vm.h> 59 #include <vm/pmap.h> 60 61 #include <x86/apicreg.h> 62 #include <machine/clock.h> 63 #include <machine/cpufunc.h> 64 #include <machine/cputypes.h> 65 #include <machine/frame.h> 66 #include <machine/intr_machdep.h> 67 #include <x86/apicvar.h> 68 #include <x86/mca.h> 69 #include <machine/md_var.h> 70 #include <machine/smp.h> 71 #include <machine/specialreg.h> 72 #include <x86/init.h> 73 74 #ifdef DDB 75 #include <sys/interrupt.h> 76 #include <ddb/ddb.h> 77 #endif 78 79 #ifdef __amd64__ 80 #define SDT_APIC SDT_SYSIGT 81 #define GSEL_APIC 0 82 #else 83 #define SDT_APIC SDT_SYS386IGT 84 #define GSEL_APIC GSEL(GCODE_SEL, SEL_KPL) 85 #endif 86 87 static MALLOC_DEFINE(M_LAPIC, "local_apic", "Local APIC items"); 88 89 /* Sanity checks on IDT vectors. */ 90 CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT); 91 CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS); 92 CTASSERT(APIC_LOCAL_INTS == 240); 93 CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); 94 95 /* Magic IRQ values for the timer and syscalls. */ 96 #define IRQ_TIMER (NUM_IO_INTS + 1) 97 #define IRQ_SYSCALL (NUM_IO_INTS + 2) 98 #define IRQ_DTRACE_RET (NUM_IO_INTS + 3) 99 #define IRQ_EVTCHN (NUM_IO_INTS + 4) 100 101 enum lat_timer_mode { 102 LAT_MODE_UNDEF = 0, 103 LAT_MODE_PERIODIC = 1, 104 LAT_MODE_ONESHOT = 2, 105 LAT_MODE_DEADLINE = 3, 106 }; 107 108 /* 109 * Support for local APICs. Local APICs manage interrupts on each 110 * individual processor as opposed to I/O APICs which receive interrupts 111 * from I/O devices and then forward them on to the local APICs. 112 * 113 * Local APICs can also send interrupts to each other thus providing the 114 * mechanism for IPIs. 115 */ 116 117 struct lvt { 118 u_int lvt_edgetrigger:1; 119 u_int lvt_activehi:1; 120 u_int lvt_masked:1; 121 u_int lvt_active:1; 122 u_int lvt_mode:16; 123 u_int lvt_vector:8; 124 }; 125 126 struct lapic { 127 struct lvt la_lvts[APIC_LVT_MAX + 1]; 128 struct lvt la_elvts[APIC_ELVT_MAX + 1];; 129 u_int la_id:8; 130 u_int la_cluster:4; 131 u_int la_cluster_id:2; 132 u_int la_present:1; 133 u_long *la_timer_count; 134 uint64_t la_timer_period; 135 enum lat_timer_mode la_timer_mode; 136 uint32_t lvt_timer_base; 137 uint32_t lvt_timer_last; 138 /* Include IDT_SYSCALL to make indexing easier. */ 139 int la_ioint_irqs[APIC_NUM_IOINTS + 1]; 140 } static *lapics; 141 142 /* Global defaults for local APIC LVT entries. */ 143 static struct lvt lvts[APIC_LVT_MAX + 1] = { 144 { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 }, /* LINT0: masked ExtINT */ 145 { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 }, /* LINT1: NMI */ 146 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT }, /* Timer */ 147 { 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT }, /* Error */ 148 { 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 }, /* PMC */ 149 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT }, /* Thermal */ 150 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT }, /* CMCI */ 151 }; 152 153 /* Global defaults for AMD local APIC ELVT entries. */ 154 static struct lvt elvts[APIC_ELVT_MAX + 1] = { 155 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 156 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, APIC_CMC_INT }, 157 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 158 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 159 }; 160 161 static inthand_t *ioint_handlers[] = { 162 NULL, /* 0 - 31 */ 163 IDTVEC(apic_isr1), /* 32 - 63 */ 164 IDTVEC(apic_isr2), /* 64 - 95 */ 165 IDTVEC(apic_isr3), /* 96 - 127 */ 166 IDTVEC(apic_isr4), /* 128 - 159 */ 167 IDTVEC(apic_isr5), /* 160 - 191 */ 168 IDTVEC(apic_isr6), /* 192 - 223 */ 169 IDTVEC(apic_isr7), /* 224 - 255 */ 170 }; 171 172 static inthand_t *ioint_pti_handlers[] = { 173 NULL, /* 0 - 31 */ 174 IDTVEC(apic_isr1_pti), /* 32 - 63 */ 175 IDTVEC(apic_isr2_pti), /* 64 - 95 */ 176 IDTVEC(apic_isr3_pti), /* 96 - 127 */ 177 IDTVEC(apic_isr4_pti), /* 128 - 159 */ 178 IDTVEC(apic_isr5_pti), /* 160 - 191 */ 179 IDTVEC(apic_isr6_pti), /* 192 - 223 */ 180 IDTVEC(apic_isr7_pti), /* 224 - 255 */ 181 }; 182 183 static u_int32_t lapic_timer_divisors[] = { 184 APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, 185 APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128 186 }; 187 188 extern inthand_t IDTVEC(rsvd_pti), IDTVEC(rsvd); 189 190 volatile char *lapic_map; 191 vm_paddr_t lapic_paddr; 192 int x2apic_mode; 193 int lapic_eoi_suppression; 194 static int lapic_timer_tsc_deadline; 195 static u_long lapic_timer_divisor, count_freq; 196 static struct eventtimer lapic_et; 197 #ifdef SMP 198 static uint64_t lapic_ipi_wait_mult; 199 #endif 200 unsigned int max_apic_id; 201 202 SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD, 0, "APIC options"); 203 SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, ""); 204 SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD, 205 &lapic_eoi_suppression, 0, ""); 206 SYSCTL_INT(_hw_apic, OID_AUTO, timer_tsc_deadline, CTLFLAG_RD, 207 &lapic_timer_tsc_deadline, 0, ""); 208 209 static void lapic_calibrate_initcount(struct lapic *la); 210 static void lapic_calibrate_deadline(struct lapic *la); 211 212 static uint32_t 213 lapic_read32(enum LAPIC_REGISTERS reg) 214 { 215 uint32_t res; 216 217 if (x2apic_mode) { 218 res = rdmsr32(MSR_APIC_000 + reg); 219 } else { 220 res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL); 221 } 222 return (res); 223 } 224 225 static void 226 lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val) 227 { 228 229 if (x2apic_mode) { 230 mfence(); 231 lfence(); 232 wrmsr(MSR_APIC_000 + reg, val); 233 } else { 234 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 235 } 236 } 237 238 static void 239 lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val) 240 { 241 242 if (x2apic_mode) { 243 wrmsr(MSR_APIC_000 + reg, val); 244 } else { 245 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 246 } 247 } 248 249 #ifdef SMP 250 static uint64_t 251 lapic_read_icr(void) 252 { 253 uint64_t v; 254 uint32_t vhi, vlo; 255 256 if (x2apic_mode) { 257 v = rdmsr(MSR_APIC_000 + LAPIC_ICR_LO); 258 } else { 259 vhi = lapic_read32(LAPIC_ICR_HI); 260 vlo = lapic_read32(LAPIC_ICR_LO); 261 v = ((uint64_t)vhi << 32) | vlo; 262 } 263 return (v); 264 } 265 266 static uint64_t 267 lapic_read_icr_lo(void) 268 { 269 270 return (lapic_read32(LAPIC_ICR_LO)); 271 } 272 273 static void 274 lapic_write_icr(uint32_t vhi, uint32_t vlo) 275 { 276 uint64_t v; 277 278 if (x2apic_mode) { 279 v = ((uint64_t)vhi << 32) | vlo; 280 mfence(); 281 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v); 282 } else { 283 lapic_write32(LAPIC_ICR_HI, vhi); 284 lapic_write32(LAPIC_ICR_LO, vlo); 285 } 286 } 287 #endif /* SMP */ 288 289 static void 290 native_lapic_enable_x2apic(void) 291 { 292 uint64_t apic_base; 293 294 apic_base = rdmsr(MSR_APICBASE); 295 apic_base |= APICBASE_X2APIC | APICBASE_ENABLED; 296 wrmsr(MSR_APICBASE, apic_base); 297 } 298 299 static bool 300 native_lapic_is_x2apic(void) 301 { 302 uint64_t apic_base; 303 304 apic_base = rdmsr(MSR_APICBASE); 305 return ((apic_base & (APICBASE_X2APIC | APICBASE_ENABLED)) == 306 (APICBASE_X2APIC | APICBASE_ENABLED)); 307 } 308 309 static void lapic_enable(void); 310 static void lapic_resume(struct pic *pic, bool suspend_cancelled); 311 static void lapic_timer_oneshot(struct lapic *); 312 static void lapic_timer_oneshot_nointr(struct lapic *, uint32_t); 313 static void lapic_timer_periodic(struct lapic *); 314 static void lapic_timer_deadline(struct lapic *); 315 static void lapic_timer_stop(struct lapic *); 316 static void lapic_timer_set_divisor(u_int divisor); 317 static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value); 318 static int lapic_et_start(struct eventtimer *et, 319 sbintime_t first, sbintime_t period); 320 static int lapic_et_stop(struct eventtimer *et); 321 static u_int apic_idt_to_irq(u_int apic_id, u_int vector); 322 static void lapic_set_tpr(u_int vector); 323 324 struct pic lapic_pic = { .pic_resume = lapic_resume }; 325 326 /* Forward declarations for apic_ops */ 327 static void native_lapic_create(u_int apic_id, int boot_cpu); 328 static void native_lapic_init(vm_paddr_t addr); 329 static void native_lapic_xapic_mode(void); 330 static void native_lapic_setup(int boot); 331 static void native_lapic_dump(const char *str); 332 static void native_lapic_disable(void); 333 static void native_lapic_eoi(void); 334 static int native_lapic_id(void); 335 static int native_lapic_intr_pending(u_int vector); 336 static u_int native_apic_cpuid(u_int apic_id); 337 static u_int native_apic_alloc_vector(u_int apic_id, u_int irq); 338 static u_int native_apic_alloc_vectors(u_int apic_id, u_int *irqs, 339 u_int count, u_int align); 340 static void native_apic_disable_vector(u_int apic_id, u_int vector); 341 static void native_apic_enable_vector(u_int apic_id, u_int vector); 342 static void native_apic_free_vector(u_int apic_id, u_int vector, u_int irq); 343 static void native_lapic_set_logical_id(u_int apic_id, u_int cluster, 344 u_int cluster_id); 345 static int native_lapic_enable_pmc(void); 346 static void native_lapic_disable_pmc(void); 347 static void native_lapic_reenable_pmc(void); 348 static void native_lapic_enable_cmc(void); 349 static int native_lapic_enable_mca_elvt(void); 350 static int native_lapic_set_lvt_mask(u_int apic_id, u_int lvt, 351 u_char masked); 352 static int native_lapic_set_lvt_mode(u_int apic_id, u_int lvt, 353 uint32_t mode); 354 static int native_lapic_set_lvt_polarity(u_int apic_id, u_int lvt, 355 enum intr_polarity pol); 356 static int native_lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, 357 enum intr_trigger trigger); 358 #ifdef SMP 359 static void native_lapic_ipi_raw(register_t icrlo, u_int dest); 360 static void native_lapic_ipi_vectored(u_int vector, int dest); 361 static int native_lapic_ipi_wait(int delay); 362 #endif /* SMP */ 363 static int native_lapic_ipi_alloc(inthand_t *ipifunc); 364 static void native_lapic_ipi_free(int vector); 365 366 struct apic_ops apic_ops = { 367 .create = native_lapic_create, 368 .init = native_lapic_init, 369 .xapic_mode = native_lapic_xapic_mode, 370 .is_x2apic = native_lapic_is_x2apic, 371 .setup = native_lapic_setup, 372 .dump = native_lapic_dump, 373 .disable = native_lapic_disable, 374 .eoi = native_lapic_eoi, 375 .id = native_lapic_id, 376 .intr_pending = native_lapic_intr_pending, 377 .set_logical_id = native_lapic_set_logical_id, 378 .cpuid = native_apic_cpuid, 379 .alloc_vector = native_apic_alloc_vector, 380 .alloc_vectors = native_apic_alloc_vectors, 381 .enable_vector = native_apic_enable_vector, 382 .disable_vector = native_apic_disable_vector, 383 .free_vector = native_apic_free_vector, 384 .enable_pmc = native_lapic_enable_pmc, 385 .disable_pmc = native_lapic_disable_pmc, 386 .reenable_pmc = native_lapic_reenable_pmc, 387 .enable_cmc = native_lapic_enable_cmc, 388 .enable_mca_elvt = native_lapic_enable_mca_elvt, 389 #ifdef SMP 390 .ipi_raw = native_lapic_ipi_raw, 391 .ipi_vectored = native_lapic_ipi_vectored, 392 .ipi_wait = native_lapic_ipi_wait, 393 #endif 394 .ipi_alloc = native_lapic_ipi_alloc, 395 .ipi_free = native_lapic_ipi_free, 396 .set_lvt_mask = native_lapic_set_lvt_mask, 397 .set_lvt_mode = native_lapic_set_lvt_mode, 398 .set_lvt_polarity = native_lapic_set_lvt_polarity, 399 .set_lvt_triggermode = native_lapic_set_lvt_triggermode, 400 }; 401 402 static uint32_t 403 lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value) 404 { 405 406 value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM | 407 APIC_LVT_VECTOR); 408 if (lvt->lvt_edgetrigger == 0) 409 value |= APIC_LVT_TM; 410 if (lvt->lvt_activehi == 0) 411 value |= APIC_LVT_IIPP_INTALO; 412 if (lvt->lvt_masked) 413 value |= APIC_LVT_M; 414 value |= lvt->lvt_mode; 415 switch (lvt->lvt_mode) { 416 case APIC_LVT_DM_NMI: 417 case APIC_LVT_DM_SMI: 418 case APIC_LVT_DM_INIT: 419 case APIC_LVT_DM_EXTINT: 420 if (!lvt->lvt_edgetrigger && bootverbose) { 421 printf("lapic%u: Forcing LINT%u to edge trigger\n", 422 la->la_id, pin); 423 value &= ~APIC_LVT_TM; 424 } 425 /* Use a vector of 0. */ 426 break; 427 case APIC_LVT_DM_FIXED: 428 value |= lvt->lvt_vector; 429 break; 430 default: 431 panic("bad APIC LVT delivery mode: %#x\n", value); 432 } 433 return (value); 434 } 435 436 static uint32_t 437 lvt_mode(struct lapic *la, u_int pin, uint32_t value) 438 { 439 struct lvt *lvt; 440 441 KASSERT(pin <= APIC_LVT_MAX, 442 ("%s: pin %u out of range", __func__, pin)); 443 if (la->la_lvts[pin].lvt_active) 444 lvt = &la->la_lvts[pin]; 445 else 446 lvt = &lvts[pin]; 447 448 return (lvt_mode_impl(la, lvt, pin, value)); 449 } 450 451 static uint32_t 452 elvt_mode(struct lapic *la, u_int idx, uint32_t value) 453 { 454 struct lvt *elvt; 455 456 KASSERT(idx <= APIC_ELVT_MAX, 457 ("%s: idx %u out of range", __func__, idx)); 458 459 elvt = &la->la_elvts[idx]; 460 KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx)); 461 KASSERT(elvt->lvt_edgetrigger, 462 ("%s: ELVT%u is not edge triggered", __func__, idx)); 463 KASSERT(elvt->lvt_activehi, 464 ("%s: ELVT%u is not active high", __func__, idx)); 465 return (lvt_mode_impl(la, elvt, idx, value)); 466 } 467 468 /* 469 * Map the local APIC and setup necessary interrupt vectors. 470 */ 471 static void 472 native_lapic_init(vm_paddr_t addr) 473 { 474 #ifdef SMP 475 uint64_t r, r1, r2, rx; 476 #endif 477 uint32_t ver; 478 u_int regs[4]; 479 int i, arat; 480 481 /* 482 * Enable x2APIC mode if possible. Map the local APIC 483 * registers page. 484 * 485 * Keep the LAPIC registers page mapped uncached for x2APIC 486 * mode too, to have direct map page attribute set to 487 * uncached. This is needed to work around CPU errata present 488 * on all Intel processors. 489 */ 490 KASSERT(trunc_page(addr) == addr, 491 ("local APIC not aligned on a page boundary")); 492 lapic_paddr = addr; 493 lapic_map = pmap_mapdev(addr, PAGE_SIZE); 494 if (x2apic_mode) { 495 native_lapic_enable_x2apic(); 496 lapic_map = NULL; 497 } 498 499 /* Setup the spurious interrupt handler. */ 500 setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL, 501 GSEL_APIC); 502 503 /* Perform basic initialization of the BSP's local APIC. */ 504 lapic_enable(); 505 506 /* Set BSP's per-CPU local APIC ID. */ 507 PCPU_SET(apic_id, lapic_id()); 508 509 /* Local APIC timer interrupt. */ 510 setidt(APIC_TIMER_INT, pti ? IDTVEC(timerint_pti) : IDTVEC(timerint), 511 SDT_APIC, SEL_KPL, GSEL_APIC); 512 513 /* Local APIC error interrupt. */ 514 setidt(APIC_ERROR_INT, pti ? IDTVEC(errorint_pti) : IDTVEC(errorint), 515 SDT_APIC, SEL_KPL, GSEL_APIC); 516 517 /* XXX: Thermal interrupt */ 518 519 /* Local APIC CMCI. */ 520 setidt(APIC_CMC_INT, pti ? IDTVEC(cmcint_pti) : IDTVEC(cmcint), 521 SDT_APIC, SEL_KPL, GSEL_APIC); 522 523 if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) { 524 arat = 0; 525 /* Intel CPUID 0x06 EAX[2] set if APIC timer runs in C3. */ 526 if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_high >= 6) { 527 do_cpuid(0x06, regs); 528 if ((regs[0] & CPUTPM1_ARAT) != 0) 529 arat = 1; 530 } else if (cpu_vendor_id == CPU_VENDOR_AMD && 531 CPUID_TO_FAMILY(cpu_id) >= 0x12) { 532 arat = 1; 533 } 534 bzero(&lapic_et, sizeof(lapic_et)); 535 lapic_et.et_name = "LAPIC"; 536 lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT | 537 ET_FLAGS_PERCPU; 538 lapic_et.et_quality = 600; 539 if (!arat) { 540 lapic_et.et_flags |= ET_FLAGS_C3STOP; 541 lapic_et.et_quality = 100; 542 } 543 if ((cpu_feature & CPUID_TSC) != 0 && 544 (cpu_feature2 & CPUID2_TSCDLT) != 0 && 545 tsc_is_invariant && tsc_freq != 0) { 546 lapic_timer_tsc_deadline = 1; 547 TUNABLE_INT_FETCH("hw.lapic_tsc_deadline", 548 &lapic_timer_tsc_deadline); 549 } 550 551 lapic_et.et_frequency = 0; 552 /* We don't know frequency yet, so trying to guess. */ 553 lapic_et.et_min_period = 0x00001000LL; 554 lapic_et.et_max_period = SBT_1S; 555 lapic_et.et_start = lapic_et_start; 556 lapic_et.et_stop = lapic_et_stop; 557 lapic_et.et_priv = NULL; 558 et_register(&lapic_et); 559 } 560 561 /* 562 * Set lapic_eoi_suppression after lapic_enable(), to not 563 * enable suppression in the hardware prematurely. Note that 564 * we by default enable suppression even when system only has 565 * one IO-APIC, since EOI is broadcasted to all APIC agents, 566 * including CPUs, otherwise. 567 * 568 * It seems that at least some KVM versions report 569 * EOI_SUPPRESSION bit, but auto-EOI does not work. 570 */ 571 ver = lapic_read32(LAPIC_VERSION); 572 if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) { 573 lapic_eoi_suppression = 1; 574 if (vm_guest == VM_GUEST_KVM) { 575 if (bootverbose) 576 printf( 577 "KVM -- disabling lapic eoi suppression\n"); 578 lapic_eoi_suppression = 0; 579 } 580 TUNABLE_INT_FETCH("hw.lapic_eoi_suppression", 581 &lapic_eoi_suppression); 582 } 583 584 #ifdef SMP 585 #define LOOPS 100000 586 /* 587 * Calibrate the busy loop waiting for IPI ack in xAPIC mode. 588 * lapic_ipi_wait_mult contains the number of iterations which 589 * approximately delay execution for 1 microsecond (the 590 * argument to native_lapic_ipi_wait() is in microseconds). 591 * 592 * We assume that TSC is present and already measured. 593 * Possible TSC frequency jumps are irrelevant to the 594 * calibration loop below, the CPU clock management code is 595 * not yet started, and we do not enter sleep states. 596 */ 597 KASSERT((cpu_feature & CPUID_TSC) != 0 && tsc_freq != 0, 598 ("TSC not initialized")); 599 if (!x2apic_mode) { 600 r = rdtsc(); 601 for (rx = 0; rx < LOOPS; rx++) { 602 (void)lapic_read_icr_lo(); 603 ia32_pause(); 604 } 605 r = rdtsc() - r; 606 r1 = tsc_freq * LOOPS; 607 r2 = r * 1000000; 608 lapic_ipi_wait_mult = r1 >= r2 ? r1 / r2 : 1; 609 if (bootverbose) { 610 printf("LAPIC: ipi_wait() us multiplier %ju (r %ju " 611 "tsc %ju)\n", (uintmax_t)lapic_ipi_wait_mult, 612 (uintmax_t)r, (uintmax_t)tsc_freq); 613 } 614 } 615 #undef LOOPS 616 #endif /* SMP */ 617 } 618 619 /* 620 * Create a local APIC instance. 621 */ 622 static void 623 native_lapic_create(u_int apic_id, int boot_cpu) 624 { 625 int i; 626 627 if (apic_id > max_apic_id) { 628 printf("APIC: Ignoring local APIC with ID %d\n", apic_id); 629 if (boot_cpu) 630 panic("Can't ignore BSP"); 631 return; 632 } 633 KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u", 634 apic_id)); 635 636 /* 637 * Assume no local LVT overrides and a cluster of 0 and 638 * intra-cluster ID of 0. 639 */ 640 lapics[apic_id].la_present = 1; 641 lapics[apic_id].la_id = apic_id; 642 for (i = 0; i <= APIC_LVT_MAX; i++) { 643 lapics[apic_id].la_lvts[i] = lvts[i]; 644 lapics[apic_id].la_lvts[i].lvt_active = 0; 645 } 646 for (i = 0; i <= APIC_ELVT_MAX; i++) { 647 lapics[apic_id].la_elvts[i] = elvts[i]; 648 lapics[apic_id].la_elvts[i].lvt_active = 0; 649 } 650 for (i = 0; i <= APIC_NUM_IOINTS; i++) 651 lapics[apic_id].la_ioint_irqs[i] = -1; 652 lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; 653 lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = 654 IRQ_TIMER; 655 #ifdef KDTRACE_HOOKS 656 lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = 657 IRQ_DTRACE_RET; 658 #endif 659 #ifdef XENHVM 660 lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN; 661 #endif 662 663 664 #ifdef SMP 665 cpu_add(apic_id, boot_cpu); 666 #endif 667 } 668 669 static inline uint32_t 670 amd_read_ext_features(void) 671 { 672 uint32_t version; 673 674 if (cpu_vendor_id != CPU_VENDOR_AMD) 675 return (0); 676 version = lapic_read32(LAPIC_VERSION); 677 if ((version & APIC_VER_AMD_EXT_SPACE) != 0) 678 return (lapic_read32(LAPIC_EXT_FEATURES)); 679 else 680 return (0); 681 } 682 683 static inline uint32_t 684 amd_read_elvt_count(void) 685 { 686 uint32_t extf; 687 uint32_t count; 688 689 extf = amd_read_ext_features(); 690 count = (extf & APIC_EXTF_ELVT_MASK) >> APIC_EXTF_ELVT_SHIFT; 691 count = min(count, APIC_ELVT_MAX + 1); 692 return (count); 693 } 694 695 /* 696 * Dump contents of local APIC registers 697 */ 698 static void 699 native_lapic_dump(const char* str) 700 { 701 uint32_t version; 702 uint32_t maxlvt; 703 uint32_t extf; 704 int elvt_count; 705 int i; 706 707 version = lapic_read32(LAPIC_VERSION); 708 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 709 printf("cpu%d %s:\n", PCPU_GET(cpuid), str); 710 printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x", 711 lapic_read32(LAPIC_ID), version, 712 lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR)); 713 if ((cpu_feature2 & CPUID2_X2APIC) != 0) 714 printf(" x2APIC: %d", x2apic_mode); 715 printf("\n lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", 716 lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1), 717 lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR)); 718 printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x", 719 lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL), 720 lapic_read32(LAPIC_LVT_ERROR)); 721 if (maxlvt >= APIC_LVT_PMC) 722 printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT)); 723 printf("\n"); 724 if (maxlvt >= APIC_LVT_CMCI) 725 printf(" cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI)); 726 extf = amd_read_ext_features(); 727 if (extf != 0) { 728 printf(" AMD ext features: 0x%08x\n", extf); 729 elvt_count = amd_read_elvt_count(); 730 for (i = 0; i < elvt_count; i++) 731 printf(" AMD elvt%d: 0x%08x\n", i, 732 lapic_read32(LAPIC_EXT_LVT0 + i)); 733 } 734 } 735 736 static void 737 native_lapic_xapic_mode(void) 738 { 739 register_t saveintr; 740 741 saveintr = intr_disable(); 742 if (x2apic_mode) 743 native_lapic_enable_x2apic(); 744 intr_restore(saveintr); 745 } 746 747 static void 748 native_lapic_setup(int boot) 749 { 750 struct lapic *la; 751 uint32_t version; 752 uint32_t maxlvt; 753 register_t saveintr; 754 char buf[MAXCOMLEN + 1]; 755 int elvt_count; 756 int i; 757 758 saveintr = intr_disable(); 759 760 la = &lapics[lapic_id()]; 761 KASSERT(la->la_present, ("missing APIC structure")); 762 version = lapic_read32(LAPIC_VERSION); 763 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 764 765 /* Initialize the TPR to allow all interrupts. */ 766 lapic_set_tpr(0); 767 768 /* Setup spurious vector and enable the local APIC. */ 769 lapic_enable(); 770 771 /* Program LINT[01] LVT entries. */ 772 lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0, 773 lapic_read32(LAPIC_LVT_LINT0))); 774 lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1, 775 lapic_read32(LAPIC_LVT_LINT1))); 776 777 /* Program the PMC LVT entry if present. */ 778 if (maxlvt >= APIC_LVT_PMC) { 779 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 780 LAPIC_LVT_PCINT)); 781 } 782 783 /* Program timer LVT and setup handler. */ 784 la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER, 785 lapic_read32(LAPIC_LVT_TIMER)); 786 la->lvt_timer_last = la->lvt_timer_base; 787 lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base); 788 if (boot) { 789 snprintf(buf, sizeof(buf), "cpu%d:timer", PCPU_GET(cpuid)); 790 intrcnt_add(buf, &la->la_timer_count); 791 } 792 793 /* Calibrate the timer parameters using BSP. */ 794 if (boot && IS_BSP()) { 795 lapic_calibrate_initcount(la); 796 if (lapic_timer_tsc_deadline) 797 lapic_calibrate_deadline(la); 798 } 799 800 /* Setup the timer if configured. */ 801 if (la->la_timer_mode != LAT_MODE_UNDEF) { 802 KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor", 803 lapic_id())); 804 switch (la->la_timer_mode) { 805 case LAT_MODE_PERIODIC: 806 lapic_timer_set_divisor(lapic_timer_divisor); 807 lapic_timer_periodic(la); 808 break; 809 case LAT_MODE_ONESHOT: 810 lapic_timer_set_divisor(lapic_timer_divisor); 811 lapic_timer_oneshot(la); 812 break; 813 case LAT_MODE_DEADLINE: 814 lapic_timer_deadline(la); 815 break; 816 default: 817 panic("corrupted la_timer_mode %p %d", la, 818 la->la_timer_mode); 819 } 820 } 821 822 /* Program error LVT and clear any existing errors. */ 823 lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR, 824 lapic_read32(LAPIC_LVT_ERROR))); 825 lapic_write32(LAPIC_ESR, 0); 826 827 /* XXX: Thermal LVT */ 828 829 /* Program the CMCI LVT entry if present. */ 830 if (maxlvt >= APIC_LVT_CMCI) { 831 lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI, 832 lapic_read32(LAPIC_LVT_CMCI))); 833 } 834 835 elvt_count = amd_read_elvt_count(); 836 for (i = 0; i < elvt_count; i++) { 837 if (la->la_elvts[i].lvt_active) 838 lapic_write32(LAPIC_EXT_LVT0 + i, 839 elvt_mode(la, i, lapic_read32(LAPIC_EXT_LVT0 + i))); 840 } 841 842 intr_restore(saveintr); 843 } 844 845 static void 846 native_lapic_reenable_pmc(void) 847 { 848 #ifdef HWPMC_HOOKS 849 uint32_t value; 850 851 value = lapic_read32(LAPIC_LVT_PCINT); 852 value &= ~APIC_LVT_M; 853 lapic_write32(LAPIC_LVT_PCINT, value); 854 #endif 855 } 856 857 #ifdef HWPMC_HOOKS 858 static void 859 lapic_update_pmc(void *dummy) 860 { 861 struct lapic *la; 862 863 la = &lapics[lapic_id()]; 864 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 865 lapic_read32(LAPIC_LVT_PCINT))); 866 } 867 #endif 868 869 static int 870 native_lapic_enable_pmc(void) 871 { 872 #ifdef HWPMC_HOOKS 873 u_int32_t maxlvt; 874 875 /* Fail if the local APIC is not present. */ 876 if (!x2apic_mode && lapic_map == NULL) 877 return (0); 878 879 /* Fail if the PMC LVT is not present. */ 880 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 881 if (maxlvt < APIC_LVT_PMC) 882 return (0); 883 884 lvts[APIC_LVT_PMC].lvt_masked = 0; 885 886 #ifdef EARLY_AP_STARTUP 887 MPASS(mp_ncpus == 1 || smp_started); 888 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 889 #else 890 #ifdef SMP 891 /* 892 * If hwpmc was loaded at boot time then the APs may not be 893 * started yet. In that case, don't forward the request to 894 * them as they will program the lvt when they start. 895 */ 896 if (smp_started) 897 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 898 else 899 #endif 900 lapic_update_pmc(NULL); 901 #endif 902 return (1); 903 #else 904 return (0); 905 #endif 906 } 907 908 static void 909 native_lapic_disable_pmc(void) 910 { 911 #ifdef HWPMC_HOOKS 912 u_int32_t maxlvt; 913 914 /* Fail if the local APIC is not present. */ 915 if (!x2apic_mode && lapic_map == NULL) 916 return; 917 918 /* Fail if the PMC LVT is not present. */ 919 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 920 if (maxlvt < APIC_LVT_PMC) 921 return; 922 923 lvts[APIC_LVT_PMC].lvt_masked = 1; 924 925 #ifdef SMP 926 /* The APs should always be started when hwpmc is unloaded. */ 927 KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early")); 928 #endif 929 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 930 #endif 931 } 932 933 static void 934 lapic_calibrate_initcount(struct lapic *la) 935 { 936 u_long value; 937 938 /* Start off with a divisor of 2 (power on reset default). */ 939 lapic_timer_divisor = 2; 940 /* Try to calibrate the local APIC timer. */ 941 do { 942 lapic_timer_set_divisor(lapic_timer_divisor); 943 lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT); 944 DELAY(1000000); 945 value = APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER); 946 if (value != APIC_TIMER_MAX_COUNT) 947 break; 948 lapic_timer_divisor <<= 1; 949 } while (lapic_timer_divisor <= 128); 950 if (lapic_timer_divisor > 128) 951 panic("lapic: Divisor too big"); 952 if (bootverbose) { 953 printf("lapic: Divisor %lu, Frequency %lu Hz\n", 954 lapic_timer_divisor, value); 955 } 956 count_freq = value; 957 } 958 959 static void 960 lapic_calibrate_deadline(struct lapic *la __unused) 961 { 962 963 if (bootverbose) { 964 printf("lapic: deadline tsc mode, Frequency %ju Hz\n", 965 (uintmax_t)tsc_freq); 966 } 967 } 968 969 static void 970 lapic_change_mode(struct eventtimer *et, struct lapic *la, 971 enum lat_timer_mode newmode) 972 { 973 974 if (la->la_timer_mode == newmode) 975 return; 976 switch (newmode) { 977 case LAT_MODE_PERIODIC: 978 lapic_timer_set_divisor(lapic_timer_divisor); 979 et->et_frequency = count_freq; 980 break; 981 case LAT_MODE_DEADLINE: 982 et->et_frequency = tsc_freq; 983 break; 984 case LAT_MODE_ONESHOT: 985 lapic_timer_set_divisor(lapic_timer_divisor); 986 et->et_frequency = count_freq; 987 break; 988 default: 989 panic("lapic_change_mode %d", newmode); 990 } 991 la->la_timer_mode = newmode; 992 et->et_min_period = (0x00000002LLU << 32) / et->et_frequency; 993 et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency; 994 } 995 996 static int 997 lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) 998 { 999 struct lapic *la; 1000 1001 la = &lapics[PCPU_GET(apic_id)]; 1002 if (period != 0) { 1003 lapic_change_mode(et, la, LAT_MODE_PERIODIC); 1004 la->la_timer_period = ((uint32_t)et->et_frequency * period) >> 1005 32; 1006 lapic_timer_periodic(la); 1007 } else if (lapic_timer_tsc_deadline) { 1008 lapic_change_mode(et, la, LAT_MODE_DEADLINE); 1009 la->la_timer_period = (et->et_frequency * first) >> 32; 1010 lapic_timer_deadline(la); 1011 } else { 1012 lapic_change_mode(et, la, LAT_MODE_ONESHOT); 1013 la->la_timer_period = ((uint32_t)et->et_frequency * first) >> 1014 32; 1015 lapic_timer_oneshot(la); 1016 } 1017 return (0); 1018 } 1019 1020 static int 1021 lapic_et_stop(struct eventtimer *et) 1022 { 1023 struct lapic *la; 1024 1025 la = &lapics[PCPU_GET(apic_id)]; 1026 lapic_timer_stop(la); 1027 la->la_timer_mode = LAT_MODE_UNDEF; 1028 return (0); 1029 } 1030 1031 static void 1032 native_lapic_disable(void) 1033 { 1034 uint32_t value; 1035 1036 /* Software disable the local APIC. */ 1037 value = lapic_read32(LAPIC_SVR); 1038 value &= ~APIC_SVR_SWEN; 1039 lapic_write32(LAPIC_SVR, value); 1040 } 1041 1042 static void 1043 lapic_enable(void) 1044 { 1045 uint32_t value; 1046 1047 /* Program the spurious vector to enable the local APIC. */ 1048 value = lapic_read32(LAPIC_SVR); 1049 value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS); 1050 value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT; 1051 if (lapic_eoi_suppression) 1052 value |= APIC_SVR_EOI_SUPPRESSION; 1053 lapic_write32(LAPIC_SVR, value); 1054 } 1055 1056 /* Reset the local APIC on the BSP during resume. */ 1057 static void 1058 lapic_resume(struct pic *pic, bool suspend_cancelled) 1059 { 1060 1061 lapic_setup(0); 1062 } 1063 1064 static int 1065 native_lapic_id(void) 1066 { 1067 uint32_t v; 1068 1069 KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped")); 1070 v = lapic_read32(LAPIC_ID); 1071 if (!x2apic_mode) 1072 v >>= APIC_ID_SHIFT; 1073 return (v); 1074 } 1075 1076 static int 1077 native_lapic_intr_pending(u_int vector) 1078 { 1079 uint32_t irr; 1080 1081 /* 1082 * The IRR registers are an array of registers each of which 1083 * only describes 32 interrupts in the low 32 bits. Thus, we 1084 * divide the vector by 32 to get the register index. 1085 * Finally, we modulus the vector by 32 to determine the 1086 * individual bit to test. 1087 */ 1088 irr = lapic_read32(LAPIC_IRR0 + vector / 32); 1089 return (irr & 1 << (vector % 32)); 1090 } 1091 1092 static void 1093 native_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) 1094 { 1095 struct lapic *la; 1096 1097 KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist", 1098 __func__, apic_id)); 1099 KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big", 1100 __func__, cluster)); 1101 KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID, 1102 ("%s: intra cluster id %u too big", __func__, cluster_id)); 1103 la = &lapics[apic_id]; 1104 la->la_cluster = cluster; 1105 la->la_cluster_id = cluster_id; 1106 } 1107 1108 static int 1109 native_lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked) 1110 { 1111 1112 if (pin > APIC_LVT_MAX) 1113 return (EINVAL); 1114 if (apic_id == APIC_ID_ALL) { 1115 lvts[pin].lvt_masked = masked; 1116 if (bootverbose) 1117 printf("lapic:"); 1118 } else { 1119 KASSERT(lapics[apic_id].la_present, 1120 ("%s: missing APIC %u", __func__, apic_id)); 1121 lapics[apic_id].la_lvts[pin].lvt_masked = masked; 1122 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1123 if (bootverbose) 1124 printf("lapic%u:", apic_id); 1125 } 1126 if (bootverbose) 1127 printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked"); 1128 return (0); 1129 } 1130 1131 static int 1132 native_lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode) 1133 { 1134 struct lvt *lvt; 1135 1136 if (pin > APIC_LVT_MAX) 1137 return (EINVAL); 1138 if (apic_id == APIC_ID_ALL) { 1139 lvt = &lvts[pin]; 1140 if (bootverbose) 1141 printf("lapic:"); 1142 } else { 1143 KASSERT(lapics[apic_id].la_present, 1144 ("%s: missing APIC %u", __func__, apic_id)); 1145 lvt = &lapics[apic_id].la_lvts[pin]; 1146 lvt->lvt_active = 1; 1147 if (bootverbose) 1148 printf("lapic%u:", apic_id); 1149 } 1150 lvt->lvt_mode = mode; 1151 switch (mode) { 1152 case APIC_LVT_DM_NMI: 1153 case APIC_LVT_DM_SMI: 1154 case APIC_LVT_DM_INIT: 1155 case APIC_LVT_DM_EXTINT: 1156 lvt->lvt_edgetrigger = 1; 1157 lvt->lvt_activehi = 1; 1158 if (mode == APIC_LVT_DM_EXTINT) 1159 lvt->lvt_masked = 1; 1160 else 1161 lvt->lvt_masked = 0; 1162 break; 1163 default: 1164 panic("Unsupported delivery mode: 0x%x\n", mode); 1165 } 1166 if (bootverbose) { 1167 printf(" Routing "); 1168 switch (mode) { 1169 case APIC_LVT_DM_NMI: 1170 printf("NMI"); 1171 break; 1172 case APIC_LVT_DM_SMI: 1173 printf("SMI"); 1174 break; 1175 case APIC_LVT_DM_INIT: 1176 printf("INIT"); 1177 break; 1178 case APIC_LVT_DM_EXTINT: 1179 printf("ExtINT"); 1180 break; 1181 } 1182 printf(" -> LINT%u\n", pin); 1183 } 1184 return (0); 1185 } 1186 1187 static int 1188 native_lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol) 1189 { 1190 1191 if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM) 1192 return (EINVAL); 1193 if (apic_id == APIC_ID_ALL) { 1194 lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); 1195 if (bootverbose) 1196 printf("lapic:"); 1197 } else { 1198 KASSERT(lapics[apic_id].la_present, 1199 ("%s: missing APIC %u", __func__, apic_id)); 1200 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1201 lapics[apic_id].la_lvts[pin].lvt_activehi = 1202 (pol == INTR_POLARITY_HIGH); 1203 if (bootverbose) 1204 printf("lapic%u:", apic_id); 1205 } 1206 if (bootverbose) 1207 printf(" LINT%u polarity: %s\n", pin, 1208 pol == INTR_POLARITY_HIGH ? "high" : "low"); 1209 return (0); 1210 } 1211 1212 static int 1213 native_lapic_set_lvt_triggermode(u_int apic_id, u_int pin, 1214 enum intr_trigger trigger) 1215 { 1216 1217 if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM) 1218 return (EINVAL); 1219 if (apic_id == APIC_ID_ALL) { 1220 lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); 1221 if (bootverbose) 1222 printf("lapic:"); 1223 } else { 1224 KASSERT(lapics[apic_id].la_present, 1225 ("%s: missing APIC %u", __func__, apic_id)); 1226 lapics[apic_id].la_lvts[pin].lvt_edgetrigger = 1227 (trigger == INTR_TRIGGER_EDGE); 1228 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1229 if (bootverbose) 1230 printf("lapic%u:", apic_id); 1231 } 1232 if (bootverbose) 1233 printf(" LINT%u trigger: %s\n", pin, 1234 trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); 1235 return (0); 1236 } 1237 1238 /* 1239 * Adjust the TPR of the current CPU so that it blocks all interrupts below 1240 * the passed in vector. 1241 */ 1242 static void 1243 lapic_set_tpr(u_int vector) 1244 { 1245 #ifdef CHEAP_TPR 1246 lapic_write32(LAPIC_TPR, vector); 1247 #else 1248 uint32_t tpr; 1249 1250 tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO; 1251 tpr |= vector; 1252 lapic_write32(LAPIC_TPR, tpr); 1253 #endif 1254 } 1255 1256 static void 1257 native_lapic_eoi(void) 1258 { 1259 1260 lapic_write32_nofence(LAPIC_EOI, 0); 1261 } 1262 1263 void 1264 lapic_handle_intr(int vector, struct trapframe *frame) 1265 { 1266 struct intsrc *isrc; 1267 1268 isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id), 1269 vector)); 1270 intr_execute_handlers(isrc, frame); 1271 } 1272 1273 void 1274 lapic_handle_timer(struct trapframe *frame) 1275 { 1276 struct lapic *la; 1277 struct trapframe *oldframe; 1278 struct thread *td; 1279 1280 /* Send EOI first thing. */ 1281 lapic_eoi(); 1282 1283 #if defined(SMP) && !defined(SCHED_ULE) 1284 /* 1285 * Don't do any accounting for the disabled HTT cores, since it 1286 * will provide misleading numbers for the userland. 1287 * 1288 * No locking is necessary here, since even if we lose the race 1289 * when hlt_cpus_mask changes it is not a big deal, really. 1290 * 1291 * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask 1292 * and unlike other schedulers it actually schedules threads to 1293 * those CPUs. 1294 */ 1295 if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)) 1296 return; 1297 #endif 1298 1299 /* Look up our local APIC structure for the tick counters. */ 1300 la = &lapics[PCPU_GET(apic_id)]; 1301 (*la->la_timer_count)++; 1302 critical_enter(); 1303 if (lapic_et.et_active) { 1304 td = curthread; 1305 td->td_intr_nesting_level++; 1306 oldframe = td->td_intr_frame; 1307 td->td_intr_frame = frame; 1308 lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg); 1309 td->td_intr_frame = oldframe; 1310 td->td_intr_nesting_level--; 1311 } 1312 critical_exit(); 1313 } 1314 1315 static void 1316 lapic_timer_set_divisor(u_int divisor) 1317 { 1318 1319 KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor)); 1320 KASSERT(ffs(divisor) <= nitems(lapic_timer_divisors), 1321 ("lapic: invalid divisor %u", divisor)); 1322 lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]); 1323 } 1324 1325 static void 1326 lapic_timer_oneshot(struct lapic *la) 1327 { 1328 uint32_t value; 1329 1330 value = la->lvt_timer_base; 1331 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1332 value |= APIC_LVTT_TM_ONE_SHOT; 1333 la->lvt_timer_last = value; 1334 lapic_write32(LAPIC_LVT_TIMER, value); 1335 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1336 } 1337 1338 static void 1339 lapic_timer_oneshot_nointr(struct lapic *la, uint32_t count) 1340 { 1341 uint32_t value; 1342 1343 value = la->lvt_timer_base; 1344 value &= ~APIC_LVTT_TM; 1345 value |= APIC_LVTT_TM_ONE_SHOT | APIC_LVT_M; 1346 la->lvt_timer_last = value; 1347 lapic_write32(LAPIC_LVT_TIMER, value); 1348 lapic_write32(LAPIC_ICR_TIMER, count); 1349 } 1350 1351 static void 1352 lapic_timer_periodic(struct lapic *la) 1353 { 1354 uint32_t value; 1355 1356 value = la->lvt_timer_base; 1357 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1358 value |= APIC_LVTT_TM_PERIODIC; 1359 la->lvt_timer_last = value; 1360 lapic_write32(LAPIC_LVT_TIMER, value); 1361 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1362 } 1363 1364 static void 1365 lapic_timer_deadline(struct lapic *la) 1366 { 1367 uint32_t value; 1368 1369 value = la->lvt_timer_base; 1370 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1371 value |= APIC_LVTT_TM_TSCDLT; 1372 if (value != la->lvt_timer_last) { 1373 la->lvt_timer_last = value; 1374 lapic_write32_nofence(LAPIC_LVT_TIMER, value); 1375 if (!x2apic_mode) 1376 mfence(); 1377 } 1378 wrmsr(MSR_TSC_DEADLINE, la->la_timer_period + rdtsc()); 1379 } 1380 1381 static void 1382 lapic_timer_stop(struct lapic *la) 1383 { 1384 uint32_t value; 1385 1386 if (la->la_timer_mode == LAT_MODE_DEADLINE) { 1387 wrmsr(MSR_TSC_DEADLINE, 0); 1388 mfence(); 1389 } else { 1390 value = la->lvt_timer_base; 1391 value &= ~APIC_LVTT_TM; 1392 value |= APIC_LVT_M; 1393 la->lvt_timer_last = value; 1394 lapic_write32(LAPIC_LVT_TIMER, value); 1395 } 1396 } 1397 1398 void 1399 lapic_handle_cmc(void) 1400 { 1401 1402 lapic_eoi(); 1403 cmc_intr(); 1404 } 1405 1406 /* 1407 * Called from the mca_init() to activate the CMC interrupt if this CPU is 1408 * responsible for monitoring any MC banks for CMC events. Since mca_init() 1409 * is called prior to lapic_setup() during boot, this just needs to unmask 1410 * this CPU's LVT_CMCI entry. 1411 */ 1412 static void 1413 native_lapic_enable_cmc(void) 1414 { 1415 u_int apic_id; 1416 1417 #ifdef DEV_ATPIC 1418 if (!x2apic_mode && lapic_map == NULL) 1419 return; 1420 #endif 1421 apic_id = PCPU_GET(apic_id); 1422 KASSERT(lapics[apic_id].la_present, 1423 ("%s: missing APIC %u", __func__, apic_id)); 1424 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0; 1425 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1; 1426 if (bootverbose) 1427 printf("lapic%u: CMCI unmasked\n", apic_id); 1428 } 1429 1430 static int 1431 native_lapic_enable_mca_elvt(void) 1432 { 1433 u_int apic_id; 1434 uint32_t value; 1435 int elvt_count; 1436 1437 #ifdef DEV_ATPIC 1438 if (lapic_map == NULL) 1439 return (-1); 1440 #endif 1441 1442 apic_id = PCPU_GET(apic_id); 1443 KASSERT(lapics[apic_id].la_present, 1444 ("%s: missing APIC %u", __func__, apic_id)); 1445 elvt_count = amd_read_elvt_count(); 1446 if (elvt_count <= APIC_ELVT_MCA) 1447 return (-1); 1448 1449 value = lapic_read32(LAPIC_EXT_LVT0 + APIC_ELVT_MCA); 1450 if ((value & APIC_LVT_M) == 0) { 1451 if (bootverbose) 1452 printf("AMD MCE Thresholding Extended LVT is already active\n"); 1453 return (APIC_ELVT_MCA); 1454 } 1455 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_masked = 0; 1456 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_active = 1; 1457 if (bootverbose) 1458 printf("lapic%u: MCE Thresholding ELVT unmasked\n", apic_id); 1459 return (APIC_ELVT_MCA); 1460 } 1461 1462 void 1463 lapic_handle_error(void) 1464 { 1465 uint32_t esr; 1466 1467 /* 1468 * Read the contents of the error status register. Write to 1469 * the register first before reading from it to force the APIC 1470 * to update its value to indicate any errors that have 1471 * occurred since the previous write to the register. 1472 */ 1473 lapic_write32(LAPIC_ESR, 0); 1474 esr = lapic_read32(LAPIC_ESR); 1475 1476 printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr); 1477 lapic_eoi(); 1478 } 1479 1480 static u_int 1481 native_apic_cpuid(u_int apic_id) 1482 { 1483 #ifdef SMP 1484 return apic_cpuids[apic_id]; 1485 #else 1486 return 0; 1487 #endif 1488 } 1489 1490 /* Request a free IDT vector to be used by the specified IRQ. */ 1491 static u_int 1492 native_apic_alloc_vector(u_int apic_id, u_int irq) 1493 { 1494 u_int vector; 1495 1496 KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq)); 1497 1498 /* 1499 * Search for a free vector. Currently we just use a very simple 1500 * algorithm to find the first free vector. 1501 */ 1502 mtx_lock_spin(&icu_lock); 1503 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1504 if (lapics[apic_id].la_ioint_irqs[vector] != -1) 1505 continue; 1506 lapics[apic_id].la_ioint_irqs[vector] = irq; 1507 mtx_unlock_spin(&icu_lock); 1508 return (vector + APIC_IO_INTS); 1509 } 1510 mtx_unlock_spin(&icu_lock); 1511 return (0); 1512 } 1513 1514 /* 1515 * Request 'count' free contiguous IDT vectors to be used by 'count' 1516 * IRQs. 'count' must be a power of two and the vectors will be 1517 * aligned on a boundary of 'align'. If the request cannot be 1518 * satisfied, 0 is returned. 1519 */ 1520 static u_int 1521 native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) 1522 { 1523 u_int first, run, vector; 1524 1525 KASSERT(powerof2(count), ("bad count")); 1526 KASSERT(powerof2(align), ("bad align")); 1527 KASSERT(align >= count, ("align < count")); 1528 #ifdef INVARIANTS 1529 for (run = 0; run < count; run++) 1530 KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u", 1531 irqs[run], run)); 1532 #endif 1533 1534 /* 1535 * Search for 'count' free vectors. As with apic_alloc_vector(), 1536 * this just uses a simple first fit algorithm. 1537 */ 1538 run = 0; 1539 first = 0; 1540 mtx_lock_spin(&icu_lock); 1541 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1542 1543 /* Vector is in use, end run. */ 1544 if (lapics[apic_id].la_ioint_irqs[vector] != -1) { 1545 run = 0; 1546 first = 0; 1547 continue; 1548 } 1549 1550 /* Start a new run if run == 0 and vector is aligned. */ 1551 if (run == 0) { 1552 if ((vector & (align - 1)) != 0) 1553 continue; 1554 first = vector; 1555 } 1556 run++; 1557 1558 /* Keep looping if the run isn't long enough yet. */ 1559 if (run < count) 1560 continue; 1561 1562 /* Found a run, assign IRQs and return the first vector. */ 1563 for (vector = 0; vector < count; vector++) 1564 lapics[apic_id].la_ioint_irqs[first + vector] = 1565 irqs[vector]; 1566 mtx_unlock_spin(&icu_lock); 1567 return (first + APIC_IO_INTS); 1568 } 1569 mtx_unlock_spin(&icu_lock); 1570 printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count); 1571 return (0); 1572 } 1573 1574 /* 1575 * Enable a vector for a particular apic_id. Since all lapics share idt 1576 * entries and ioint_handlers this enables the vector on all lapics. lapics 1577 * which do not have the vector configured would report spurious interrupts 1578 * should it fire. 1579 */ 1580 static void 1581 native_apic_enable_vector(u_int apic_id, u_int vector) 1582 { 1583 1584 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1585 KASSERT(ioint_handlers[vector / 32] != NULL, 1586 ("No ISR handler for vector %u", vector)); 1587 #ifdef KDTRACE_HOOKS 1588 KASSERT(vector != IDT_DTRACE_RET, 1589 ("Attempt to overwrite DTrace entry")); 1590 #endif 1591 setidt(vector, (pti ? ioint_pti_handlers : ioint_handlers)[vector / 32], 1592 SDT_APIC, SEL_KPL, GSEL_APIC); 1593 } 1594 1595 static void 1596 native_apic_disable_vector(u_int apic_id, u_int vector) 1597 { 1598 1599 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1600 #ifdef KDTRACE_HOOKS 1601 KASSERT(vector != IDT_DTRACE_RET, 1602 ("Attempt to overwrite DTrace entry")); 1603 #endif 1604 KASSERT(ioint_handlers[vector / 32] != NULL, 1605 ("No ISR handler for vector %u", vector)); 1606 #ifdef notyet 1607 /* 1608 * We can not currently clear the idt entry because other cpus 1609 * may have a valid vector at this offset. 1610 */ 1611 setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC, 1612 SEL_KPL, GSEL_APIC); 1613 #endif 1614 } 1615 1616 /* Release an APIC vector when it's no longer in use. */ 1617 static void 1618 native_apic_free_vector(u_int apic_id, u_int vector, u_int irq) 1619 { 1620 struct thread *td; 1621 1622 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1623 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1624 ("Vector %u does not map to an IRQ line", vector)); 1625 KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq)); 1626 KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] == 1627 irq, ("IRQ mismatch")); 1628 #ifdef KDTRACE_HOOKS 1629 KASSERT(vector != IDT_DTRACE_RET, 1630 ("Attempt to overwrite DTrace entry")); 1631 #endif 1632 1633 /* 1634 * Bind us to the cpu that owned the vector before freeing it so 1635 * we don't lose an interrupt delivery race. 1636 */ 1637 td = curthread; 1638 if (!rebooting) { 1639 thread_lock(td); 1640 if (sched_is_bound(td)) 1641 panic("apic_free_vector: Thread already bound.\n"); 1642 sched_bind(td, apic_cpuid(apic_id)); 1643 thread_unlock(td); 1644 } 1645 mtx_lock_spin(&icu_lock); 1646 lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = -1; 1647 mtx_unlock_spin(&icu_lock); 1648 if (!rebooting) { 1649 thread_lock(td); 1650 sched_unbind(td); 1651 thread_unlock(td); 1652 } 1653 } 1654 1655 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */ 1656 static u_int 1657 apic_idt_to_irq(u_int apic_id, u_int vector) 1658 { 1659 int irq; 1660 1661 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1662 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1663 ("Vector %u does not map to an IRQ line", vector)); 1664 #ifdef KDTRACE_HOOKS 1665 KASSERT(vector != IDT_DTRACE_RET, 1666 ("Attempt to overwrite DTrace entry")); 1667 #endif 1668 irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]; 1669 if (irq < 0) 1670 irq = 0; 1671 return (irq); 1672 } 1673 1674 #ifdef DDB 1675 /* 1676 * Dump data about APIC IDT vector mappings. 1677 */ 1678 DB_SHOW_COMMAND(apic, db_show_apic) 1679 { 1680 struct intsrc *isrc; 1681 int i, verbose; 1682 u_int apic_id; 1683 u_int irq; 1684 1685 if (strcmp(modif, "vv") == 0) 1686 verbose = 2; 1687 else if (strcmp(modif, "v") == 0) 1688 verbose = 1; 1689 else 1690 verbose = 0; 1691 for (apic_id = 0; apic_id <= max_apic_id; apic_id++) { 1692 if (lapics[apic_id].la_present == 0) 1693 continue; 1694 db_printf("Interrupts bound to lapic %u\n", apic_id); 1695 for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) { 1696 irq = lapics[apic_id].la_ioint_irqs[i]; 1697 if (irq == -1 || irq == IRQ_SYSCALL) 1698 continue; 1699 #ifdef KDTRACE_HOOKS 1700 if (irq == IRQ_DTRACE_RET) 1701 continue; 1702 #endif 1703 #ifdef XENHVM 1704 if (irq == IRQ_EVTCHN) 1705 continue; 1706 #endif 1707 db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); 1708 if (irq == IRQ_TIMER) 1709 db_printf("lapic timer\n"); 1710 else if (irq < NUM_IO_INTS) { 1711 isrc = intr_lookup_source(irq); 1712 if (isrc == NULL || verbose == 0) 1713 db_printf("IRQ %u\n", irq); 1714 else 1715 db_dump_intr_event(isrc->is_event, 1716 verbose == 2); 1717 } else 1718 db_printf("IRQ %u ???\n", irq); 1719 } 1720 } 1721 } 1722 1723 static void 1724 dump_mask(const char *prefix, uint32_t v, int base) 1725 { 1726 int i, first; 1727 1728 first = 1; 1729 for (i = 0; i < 32; i++) 1730 if (v & (1 << i)) { 1731 if (first) { 1732 db_printf("%s:", prefix); 1733 first = 0; 1734 } 1735 db_printf(" %02x", base + i); 1736 } 1737 if (!first) 1738 db_printf("\n"); 1739 } 1740 1741 /* Show info from the lapic regs for this CPU. */ 1742 DB_SHOW_COMMAND(lapic, db_show_lapic) 1743 { 1744 uint32_t v; 1745 1746 db_printf("lapic ID = %d\n", lapic_id()); 1747 v = lapic_read32(LAPIC_VERSION); 1748 db_printf("version = %d.%d\n", (v & APIC_VER_VERSION) >> 4, 1749 v & 0xf); 1750 db_printf("max LVT = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT); 1751 v = lapic_read32(LAPIC_SVR); 1752 db_printf("SVR = %02x (%s)\n", v & APIC_SVR_VECTOR, 1753 v & APIC_SVR_ENABLE ? "enabled" : "disabled"); 1754 db_printf("TPR = %02x\n", lapic_read32(LAPIC_TPR)); 1755 1756 #define dump_field(prefix, regn, index) \ 1757 dump_mask(__XSTRING(prefix ## index), \ 1758 lapic_read32(LAPIC_ ## regn ## index), \ 1759 index * 32) 1760 1761 db_printf("In-service Interrupts:\n"); 1762 dump_field(isr, ISR, 0); 1763 dump_field(isr, ISR, 1); 1764 dump_field(isr, ISR, 2); 1765 dump_field(isr, ISR, 3); 1766 dump_field(isr, ISR, 4); 1767 dump_field(isr, ISR, 5); 1768 dump_field(isr, ISR, 6); 1769 dump_field(isr, ISR, 7); 1770 1771 db_printf("TMR Interrupts:\n"); 1772 dump_field(tmr, TMR, 0); 1773 dump_field(tmr, TMR, 1); 1774 dump_field(tmr, TMR, 2); 1775 dump_field(tmr, TMR, 3); 1776 dump_field(tmr, TMR, 4); 1777 dump_field(tmr, TMR, 5); 1778 dump_field(tmr, TMR, 6); 1779 dump_field(tmr, TMR, 7); 1780 1781 db_printf("IRR Interrupts:\n"); 1782 dump_field(irr, IRR, 0); 1783 dump_field(irr, IRR, 1); 1784 dump_field(irr, IRR, 2); 1785 dump_field(irr, IRR, 3); 1786 dump_field(irr, IRR, 4); 1787 dump_field(irr, IRR, 5); 1788 dump_field(irr, IRR, 6); 1789 dump_field(irr, IRR, 7); 1790 1791 #undef dump_field 1792 } 1793 #endif 1794 1795 /* 1796 * APIC probing support code. This includes code to manage enumerators. 1797 */ 1798 1799 static SLIST_HEAD(, apic_enumerator) enumerators = 1800 SLIST_HEAD_INITIALIZER(enumerators); 1801 static struct apic_enumerator *best_enum; 1802 1803 void 1804 apic_register_enumerator(struct apic_enumerator *enumerator) 1805 { 1806 #ifdef INVARIANTS 1807 struct apic_enumerator *apic_enum; 1808 1809 SLIST_FOREACH(apic_enum, &enumerators, apic_next) { 1810 if (apic_enum == enumerator) 1811 panic("%s: Duplicate register of %s", __func__, 1812 enumerator->apic_name); 1813 } 1814 #endif 1815 SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next); 1816 } 1817 1818 /* 1819 * We have to look for CPU's very, very early because certain subsystems 1820 * want to know how many CPU's we have extremely early on in the boot 1821 * process. 1822 */ 1823 static void 1824 apic_init(void *dummy __unused) 1825 { 1826 struct apic_enumerator *enumerator; 1827 int retval, best; 1828 1829 /* We only support built in local APICs. */ 1830 if (!(cpu_feature & CPUID_APIC)) 1831 return; 1832 1833 /* Don't probe if APIC mode is disabled. */ 1834 if (resource_disabled("apic", 0)) 1835 return; 1836 1837 /* Probe all the enumerators to find the best match. */ 1838 best_enum = NULL; 1839 best = 0; 1840 SLIST_FOREACH(enumerator, &enumerators, apic_next) { 1841 retval = enumerator->apic_probe(); 1842 if (retval > 0) 1843 continue; 1844 if (best_enum == NULL || best < retval) { 1845 best_enum = enumerator; 1846 best = retval; 1847 } 1848 } 1849 if (best_enum == NULL) { 1850 if (bootverbose) 1851 printf("APIC: Could not find any APICs.\n"); 1852 #ifndef DEV_ATPIC 1853 panic("running without device atpic requires a local APIC"); 1854 #endif 1855 return; 1856 } 1857 1858 if (bootverbose) 1859 printf("APIC: Using the %s enumerator.\n", 1860 best_enum->apic_name); 1861 1862 #ifdef I686_CPU 1863 /* 1864 * To work around an errata, we disable the local APIC on some 1865 * CPUs during early startup. We need to turn the local APIC back 1866 * on on such CPUs now. 1867 */ 1868 ppro_reenable_apic(); 1869 #endif 1870 1871 /* Probe the CPU's in the system. */ 1872 retval = best_enum->apic_probe_cpus(); 1873 if (retval != 0) 1874 printf("%s: Failed to probe CPUs: returned %d\n", 1875 best_enum->apic_name, retval); 1876 1877 } 1878 SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL); 1879 1880 /* 1881 * Setup the local APIC. We have to do this prior to starting up the APs 1882 * in the SMP case. 1883 */ 1884 static void 1885 apic_setup_local(void *dummy __unused) 1886 { 1887 int retval; 1888 1889 if (best_enum == NULL) 1890 return; 1891 1892 lapics = malloc(sizeof(*lapics) * (max_apic_id + 1), M_LAPIC, 1893 M_WAITOK | M_ZERO); 1894 1895 /* Initialize the local APIC. */ 1896 retval = best_enum->apic_setup_local(); 1897 if (retval != 0) 1898 printf("%s: Failed to setup the local APIC: returned %d\n", 1899 best_enum->apic_name, retval); 1900 } 1901 SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL); 1902 1903 /* 1904 * Setup the I/O APICs. 1905 */ 1906 static void 1907 apic_setup_io(void *dummy __unused) 1908 { 1909 int retval; 1910 1911 if (best_enum == NULL) 1912 return; 1913 1914 /* 1915 * Local APIC must be registered before other PICs and pseudo PICs 1916 * for proper suspend/resume order. 1917 */ 1918 intr_register_pic(&lapic_pic); 1919 1920 retval = best_enum->apic_setup_io(); 1921 if (retval != 0) 1922 printf("%s: Failed to setup I/O APICs: returned %d\n", 1923 best_enum->apic_name, retval); 1924 1925 /* 1926 * Finish setting up the local APIC on the BSP once we know 1927 * how to properly program the LINT pins. In particular, this 1928 * enables the EOI suppression mode, if LAPIC support it and 1929 * user did not disabled the mode. 1930 */ 1931 lapic_setup(1); 1932 if (bootverbose) 1933 lapic_dump("BSP"); 1934 1935 /* Enable the MSI "pic". */ 1936 init_ops.msi_init(); 1937 } 1938 SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL); 1939 1940 #ifdef SMP 1941 /* 1942 * Inter Processor Interrupt functions. The lapic_ipi_*() functions are 1943 * private to the MD code. The public interface for the rest of the 1944 * kernel is defined in mp_machdep.c. 1945 */ 1946 1947 /* 1948 * Wait delay microseconds for IPI to be sent. If delay is -1, we 1949 * wait forever. 1950 */ 1951 static int 1952 native_lapic_ipi_wait(int delay) 1953 { 1954 uint64_t rx; 1955 1956 /* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */ 1957 if (x2apic_mode) 1958 return (1); 1959 1960 for (rx = 0; delay == -1 || rx < lapic_ipi_wait_mult * delay; rx++) { 1961 if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) == 1962 APIC_DELSTAT_IDLE) 1963 return (1); 1964 ia32_pause(); 1965 } 1966 return (0); 1967 } 1968 1969 static void 1970 native_lapic_ipi_raw(register_t icrlo, u_int dest) 1971 { 1972 uint64_t icr; 1973 uint32_t vhi, vlo; 1974 register_t saveintr; 1975 1976 /* XXX: Need more sanity checking of icrlo? */ 1977 KASSERT(x2apic_mode || lapic_map != NULL, 1978 ("%s called too early", __func__)); 1979 KASSERT(x2apic_mode || 1980 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 1981 ("%s: invalid dest field", __func__)); 1982 KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0, 1983 ("%s: reserved bits set in ICR LO register", __func__)); 1984 1985 /* Set destination in ICR HI register if it is being used. */ 1986 if (!x2apic_mode) { 1987 saveintr = intr_disable(); 1988 icr = lapic_read_icr(); 1989 } 1990 1991 if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) { 1992 if (x2apic_mode) { 1993 vhi = dest; 1994 } else { 1995 vhi = icr >> 32; 1996 vhi &= ~APIC_ID_MASK; 1997 vhi |= dest << APIC_ID_SHIFT; 1998 } 1999 } else { 2000 vhi = 0; 2001 } 2002 2003 /* Program the contents of the IPI and dispatch it. */ 2004 if (x2apic_mode) { 2005 vlo = icrlo; 2006 } else { 2007 vlo = icr; 2008 vlo &= APIC_ICRLO_RESV_MASK; 2009 vlo |= icrlo; 2010 } 2011 lapic_write_icr(vhi, vlo); 2012 if (!x2apic_mode) 2013 intr_restore(saveintr); 2014 } 2015 2016 #define BEFORE_SPIN 50000 2017 #ifdef DETECT_DEADLOCK 2018 #define AFTER_SPIN 50 2019 #endif 2020 2021 static void 2022 native_lapic_ipi_vectored(u_int vector, int dest) 2023 { 2024 register_t icrlo, destfield; 2025 2026 KASSERT((vector & ~APIC_VECTOR_MASK) == 0, 2027 ("%s: invalid vector %d", __func__, vector)); 2028 2029 icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT; 2030 2031 /* 2032 * NMI IPIs are just fake vectors used to send a NMI. Use special rules 2033 * regarding NMIs if passed, otherwise specify the vector. 2034 */ 2035 if (vector >= IPI_NMI_FIRST) 2036 icrlo |= APIC_DELMODE_NMI; 2037 else 2038 icrlo |= vector | APIC_DELMODE_FIXED; 2039 destfield = 0; 2040 switch (dest) { 2041 case APIC_IPI_DEST_SELF: 2042 icrlo |= APIC_DEST_SELF; 2043 break; 2044 case APIC_IPI_DEST_ALL: 2045 icrlo |= APIC_DEST_ALLISELF; 2046 break; 2047 case APIC_IPI_DEST_OTHERS: 2048 icrlo |= APIC_DEST_ALLESELF; 2049 break; 2050 default: 2051 KASSERT(x2apic_mode || 2052 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 2053 ("%s: invalid destination 0x%x", __func__, dest)); 2054 destfield = dest; 2055 } 2056 2057 /* Wait for an earlier IPI to finish. */ 2058 if (!lapic_ipi_wait(BEFORE_SPIN)) { 2059 if (panicstr != NULL) 2060 return; 2061 else 2062 panic("APIC: Previous IPI is stuck"); 2063 } 2064 2065 lapic_ipi_raw(icrlo, destfield); 2066 2067 #ifdef DETECT_DEADLOCK 2068 /* Wait for IPI to be delivered. */ 2069 if (!lapic_ipi_wait(AFTER_SPIN)) { 2070 #ifdef needsattention 2071 /* 2072 * XXX FIXME: 2073 * 2074 * The above function waits for the message to actually be 2075 * delivered. It breaks out after an arbitrary timeout 2076 * since the message should eventually be delivered (at 2077 * least in theory) and that if it wasn't we would catch 2078 * the failure with the check above when the next IPI is 2079 * sent. 2080 * 2081 * We could skip this wait entirely, EXCEPT it probably 2082 * protects us from other routines that assume that the 2083 * message was delivered and acted upon when this function 2084 * returns. 2085 */ 2086 printf("APIC: IPI might be stuck\n"); 2087 #else /* !needsattention */ 2088 /* Wait until mesage is sent without a timeout. */ 2089 while (lapic_read_icr_lo() & APIC_DELSTAT_PEND) 2090 ia32_pause(); 2091 #endif /* needsattention */ 2092 } 2093 #endif /* DETECT_DEADLOCK */ 2094 } 2095 2096 #endif /* SMP */ 2097 2098 /* 2099 * Since the IDT is shared by all CPUs the IPI slot update needs to be globally 2100 * visible. 2101 * 2102 * Consider the case where an IPI is generated immediately after allocation: 2103 * vector = lapic_ipi_alloc(ipifunc); 2104 * ipi_selected(other_cpus, vector); 2105 * 2106 * In xAPIC mode a write to ICR_LO has serializing semantics because the 2107 * APIC page is mapped as an uncached region. In x2APIC mode there is an 2108 * explicit 'mfence' before the ICR MSR is written. Therefore in both cases 2109 * the IDT slot update is globally visible before the IPI is delivered. 2110 */ 2111 static int 2112 native_lapic_ipi_alloc(inthand_t *ipifunc) 2113 { 2114 struct gate_descriptor *ip; 2115 long func; 2116 int idx, vector; 2117 2118 KASSERT(ipifunc != &IDTVEC(rsvd) && ipifunc != &IDTVEC(rsvd_pti), 2119 ("invalid ipifunc %p", ipifunc)); 2120 2121 vector = -1; 2122 mtx_lock_spin(&icu_lock); 2123 for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) { 2124 ip = &idt[idx]; 2125 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2126 if ((!pti && func == (uintptr_t)&IDTVEC(rsvd)) || 2127 (pti && func == (uintptr_t)&IDTVEC(rsvd_pti))) { 2128 vector = idx; 2129 setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC); 2130 break; 2131 } 2132 } 2133 mtx_unlock_spin(&icu_lock); 2134 return (vector); 2135 } 2136 2137 static void 2138 native_lapic_ipi_free(int vector) 2139 { 2140 struct gate_descriptor *ip; 2141 long func; 2142 2143 KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST, 2144 ("%s: invalid vector %d", __func__, vector)); 2145 2146 mtx_lock_spin(&icu_lock); 2147 ip = &idt[vector]; 2148 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2149 KASSERT(func != (uintptr_t)&IDTVEC(rsvd) && 2150 func != (uintptr_t)&IDTVEC(rsvd_pti), 2151 ("invalid idtfunc %#lx", func)); 2152 setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC, 2153 SEL_KPL, GSEL_APIC); 2154 mtx_unlock_spin(&icu_lock); 2155 } 2156