1 /*- 2 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org> 3 * Copyright (c) 1996, by Steve Passe 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. The name of the developer may NOT be used to endorse or promote products 12 * derived from this software without specific prior written permission. 13 * 3. Neither the name of the author nor the names of any co-contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /* 31 * Local APIC support on Pentium and later processors. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_atpic.h" 38 #include "opt_hwpmc_hooks.h" 39 40 #include "opt_ddb.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/bus.h> 45 #include <sys/kernel.h> 46 #include <sys/lock.h> 47 #include <sys/mutex.h> 48 #include <sys/pcpu.h> 49 #include <sys/proc.h> 50 #include <sys/sched.h> 51 #include <sys/smp.h> 52 #include <sys/sysctl.h> 53 #include <sys/timeet.h> 54 55 #include <vm/vm.h> 56 #include <vm/pmap.h> 57 58 #include <x86/apicreg.h> 59 #include <machine/clock.h> 60 #include <machine/cpufunc.h> 61 #include <machine/cputypes.h> 62 #include <machine/frame.h> 63 #include <machine/intr_machdep.h> 64 #include <x86/apicvar.h> 65 #include <x86/mca.h> 66 #include <machine/md_var.h> 67 #include <machine/smp.h> 68 #include <machine/specialreg.h> 69 #include <x86/init.h> 70 71 #ifdef DDB 72 #include <sys/interrupt.h> 73 #include <ddb/ddb.h> 74 #endif 75 76 #ifdef __amd64__ 77 #define SDT_APIC SDT_SYSIGT 78 #define SDT_APICT SDT_SYSIGT 79 #define GSEL_APIC 0 80 #else 81 #define SDT_APIC SDT_SYS386IGT 82 #define SDT_APICT SDT_SYS386TGT 83 #define GSEL_APIC GSEL(GCODE_SEL, SEL_KPL) 84 #endif 85 86 /* Sanity checks on IDT vectors. */ 87 CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT); 88 CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS); 89 CTASSERT(APIC_LOCAL_INTS == 240); 90 CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); 91 92 /* Magic IRQ values for the timer and syscalls. */ 93 #define IRQ_TIMER (NUM_IO_INTS + 1) 94 #define IRQ_SYSCALL (NUM_IO_INTS + 2) 95 #define IRQ_DTRACE_RET (NUM_IO_INTS + 3) 96 #define IRQ_EVTCHN (NUM_IO_INTS + 4) 97 98 enum lat_timer_mode { 99 LAT_MODE_UNDEF = 0, 100 LAT_MODE_PERIODIC = 1, 101 LAT_MODE_ONESHOT = 2, 102 LAT_MODE_DEADLINE = 3, 103 }; 104 105 /* 106 * Support for local APICs. Local APICs manage interrupts on each 107 * individual processor as opposed to I/O APICs which receive interrupts 108 * from I/O devices and then forward them on to the local APICs. 109 * 110 * Local APICs can also send interrupts to each other thus providing the 111 * mechanism for IPIs. 112 */ 113 114 struct lvt { 115 u_int lvt_edgetrigger:1; 116 u_int lvt_activehi:1; 117 u_int lvt_masked:1; 118 u_int lvt_active:1; 119 u_int lvt_mode:16; 120 u_int lvt_vector:8; 121 }; 122 123 struct lapic { 124 struct lvt la_lvts[APIC_LVT_MAX + 1]; 125 struct lvt la_elvts[APIC_ELVT_MAX + 1];; 126 u_int la_id:8; 127 u_int la_cluster:4; 128 u_int la_cluster_id:2; 129 u_int la_present:1; 130 u_long *la_timer_count; 131 uint64_t la_timer_period; 132 enum lat_timer_mode la_timer_mode; 133 uint32_t lvt_timer_base; 134 uint32_t lvt_timer_last; 135 /* Include IDT_SYSCALL to make indexing easier. */ 136 int la_ioint_irqs[APIC_NUM_IOINTS + 1]; 137 } static lapics[MAX_APIC_ID + 1]; 138 139 /* Global defaults for local APIC LVT entries. */ 140 static struct lvt lvts[APIC_LVT_MAX + 1] = { 141 { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 }, /* LINT0: masked ExtINT */ 142 { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 }, /* LINT1: NMI */ 143 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT }, /* Timer */ 144 { 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT }, /* Error */ 145 { 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 }, /* PMC */ 146 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT }, /* Thermal */ 147 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT }, /* CMCI */ 148 }; 149 150 /* Global defaults for AMD local APIC ELVT entries. */ 151 static struct lvt elvts[APIC_ELVT_MAX + 1] = { 152 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 153 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, APIC_CMC_INT }, 154 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 155 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 156 }; 157 158 static inthand_t *ioint_handlers[] = { 159 NULL, /* 0 - 31 */ 160 IDTVEC(apic_isr1), /* 32 - 63 */ 161 IDTVEC(apic_isr2), /* 64 - 95 */ 162 IDTVEC(apic_isr3), /* 96 - 127 */ 163 IDTVEC(apic_isr4), /* 128 - 159 */ 164 IDTVEC(apic_isr5), /* 160 - 191 */ 165 IDTVEC(apic_isr6), /* 192 - 223 */ 166 IDTVEC(apic_isr7), /* 224 - 255 */ 167 }; 168 169 170 static u_int32_t lapic_timer_divisors[] = { 171 APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, 172 APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128 173 }; 174 175 extern inthand_t IDTVEC(rsvd); 176 177 volatile char *lapic_map; 178 vm_paddr_t lapic_paddr; 179 int x2apic_mode; 180 int lapic_eoi_suppression; 181 static int lapic_timer_tsc_deadline; 182 static u_long lapic_timer_divisor, count_freq; 183 static struct eventtimer lapic_et; 184 #ifdef SMP 185 static uint64_t lapic_ipi_wait_mult; 186 #endif 187 188 SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD, 0, "APIC options"); 189 SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, ""); 190 SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD, 191 &lapic_eoi_suppression, 0, ""); 192 SYSCTL_INT(_hw_apic, OID_AUTO, timer_tsc_deadline, CTLFLAG_RD, 193 &lapic_timer_tsc_deadline, 0, ""); 194 195 static uint32_t 196 lapic_read32(enum LAPIC_REGISTERS reg) 197 { 198 uint32_t res; 199 200 if (x2apic_mode) { 201 res = rdmsr32(MSR_APIC_000 + reg); 202 } else { 203 res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL); 204 } 205 return (res); 206 } 207 208 static void 209 lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val) 210 { 211 212 if (x2apic_mode) { 213 mfence(); 214 wrmsr(MSR_APIC_000 + reg, val); 215 } else { 216 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 217 } 218 } 219 220 static void 221 lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val) 222 { 223 224 if (x2apic_mode) { 225 wrmsr(MSR_APIC_000 + reg, val); 226 } else { 227 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 228 } 229 } 230 231 #ifdef SMP 232 static uint64_t 233 lapic_read_icr(void) 234 { 235 uint64_t v; 236 uint32_t vhi, vlo; 237 238 if (x2apic_mode) { 239 v = rdmsr(MSR_APIC_000 + LAPIC_ICR_LO); 240 } else { 241 vhi = lapic_read32(LAPIC_ICR_HI); 242 vlo = lapic_read32(LAPIC_ICR_LO); 243 v = ((uint64_t)vhi << 32) | vlo; 244 } 245 return (v); 246 } 247 248 static uint64_t 249 lapic_read_icr_lo(void) 250 { 251 252 return (lapic_read32(LAPIC_ICR_LO)); 253 } 254 255 static void 256 lapic_write_icr(uint32_t vhi, uint32_t vlo) 257 { 258 uint64_t v; 259 260 if (x2apic_mode) { 261 v = ((uint64_t)vhi << 32) | vlo; 262 mfence(); 263 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v); 264 } else { 265 lapic_write32(LAPIC_ICR_HI, vhi); 266 lapic_write32(LAPIC_ICR_LO, vlo); 267 } 268 } 269 #endif /* SMP */ 270 271 static void 272 native_lapic_enable_x2apic(void) 273 { 274 uint64_t apic_base; 275 276 apic_base = rdmsr(MSR_APICBASE); 277 apic_base |= APICBASE_X2APIC | APICBASE_ENABLED; 278 wrmsr(MSR_APICBASE, apic_base); 279 } 280 281 static bool 282 native_lapic_is_x2apic(void) 283 { 284 uint64_t apic_base; 285 286 apic_base = rdmsr(MSR_APICBASE); 287 return ((apic_base & (APICBASE_X2APIC | APICBASE_ENABLED)) == 288 (APICBASE_X2APIC | APICBASE_ENABLED)); 289 } 290 291 static void lapic_enable(void); 292 static void lapic_resume(struct pic *pic, bool suspend_cancelled); 293 static void lapic_timer_oneshot(struct lapic *); 294 static void lapic_timer_oneshot_nointr(struct lapic *, uint32_t); 295 static void lapic_timer_periodic(struct lapic *); 296 static void lapic_timer_deadline(struct lapic *); 297 static void lapic_timer_stop(struct lapic *); 298 static void lapic_timer_set_divisor(u_int divisor); 299 static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value); 300 static int lapic_et_start(struct eventtimer *et, 301 sbintime_t first, sbintime_t period); 302 static int lapic_et_stop(struct eventtimer *et); 303 static u_int apic_idt_to_irq(u_int apic_id, u_int vector); 304 static void lapic_set_tpr(u_int vector); 305 306 struct pic lapic_pic = { .pic_resume = lapic_resume }; 307 308 /* Forward declarations for apic_ops */ 309 static void native_lapic_create(u_int apic_id, int boot_cpu); 310 static void native_lapic_init(vm_paddr_t addr); 311 static void native_lapic_xapic_mode(void); 312 static void native_lapic_setup(int boot); 313 static void native_lapic_dump(const char *str); 314 static void native_lapic_disable(void); 315 static void native_lapic_eoi(void); 316 static int native_lapic_id(void); 317 static int native_lapic_intr_pending(u_int vector); 318 static u_int native_apic_cpuid(u_int apic_id); 319 static u_int native_apic_alloc_vector(u_int apic_id, u_int irq); 320 static u_int native_apic_alloc_vectors(u_int apic_id, u_int *irqs, 321 u_int count, u_int align); 322 static void native_apic_disable_vector(u_int apic_id, u_int vector); 323 static void native_apic_enable_vector(u_int apic_id, u_int vector); 324 static void native_apic_free_vector(u_int apic_id, u_int vector, u_int irq); 325 static void native_lapic_set_logical_id(u_int apic_id, u_int cluster, 326 u_int cluster_id); 327 static int native_lapic_enable_pmc(void); 328 static void native_lapic_disable_pmc(void); 329 static void native_lapic_reenable_pmc(void); 330 static void native_lapic_enable_cmc(void); 331 static int native_lapic_enable_mca_elvt(void); 332 static int native_lapic_set_lvt_mask(u_int apic_id, u_int lvt, 333 u_char masked); 334 static int native_lapic_set_lvt_mode(u_int apic_id, u_int lvt, 335 uint32_t mode); 336 static int native_lapic_set_lvt_polarity(u_int apic_id, u_int lvt, 337 enum intr_polarity pol); 338 static int native_lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, 339 enum intr_trigger trigger); 340 #ifdef SMP 341 static void native_lapic_ipi_raw(register_t icrlo, u_int dest); 342 static void native_lapic_ipi_vectored(u_int vector, int dest); 343 static int native_lapic_ipi_wait(int delay); 344 #endif /* SMP */ 345 static int native_lapic_ipi_alloc(inthand_t *ipifunc); 346 static void native_lapic_ipi_free(int vector); 347 348 struct apic_ops apic_ops = { 349 .create = native_lapic_create, 350 .init = native_lapic_init, 351 .xapic_mode = native_lapic_xapic_mode, 352 .is_x2apic = native_lapic_is_x2apic, 353 .setup = native_lapic_setup, 354 .dump = native_lapic_dump, 355 .disable = native_lapic_disable, 356 .eoi = native_lapic_eoi, 357 .id = native_lapic_id, 358 .intr_pending = native_lapic_intr_pending, 359 .set_logical_id = native_lapic_set_logical_id, 360 .cpuid = native_apic_cpuid, 361 .alloc_vector = native_apic_alloc_vector, 362 .alloc_vectors = native_apic_alloc_vectors, 363 .enable_vector = native_apic_enable_vector, 364 .disable_vector = native_apic_disable_vector, 365 .free_vector = native_apic_free_vector, 366 .enable_pmc = native_lapic_enable_pmc, 367 .disable_pmc = native_lapic_disable_pmc, 368 .reenable_pmc = native_lapic_reenable_pmc, 369 .enable_cmc = native_lapic_enable_cmc, 370 .enable_mca_elvt = native_lapic_enable_mca_elvt, 371 #ifdef SMP 372 .ipi_raw = native_lapic_ipi_raw, 373 .ipi_vectored = native_lapic_ipi_vectored, 374 .ipi_wait = native_lapic_ipi_wait, 375 #endif 376 .ipi_alloc = native_lapic_ipi_alloc, 377 .ipi_free = native_lapic_ipi_free, 378 .set_lvt_mask = native_lapic_set_lvt_mask, 379 .set_lvt_mode = native_lapic_set_lvt_mode, 380 .set_lvt_polarity = native_lapic_set_lvt_polarity, 381 .set_lvt_triggermode = native_lapic_set_lvt_triggermode, 382 }; 383 384 static uint32_t 385 lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value) 386 { 387 388 value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM | 389 APIC_LVT_VECTOR); 390 if (lvt->lvt_edgetrigger == 0) 391 value |= APIC_LVT_TM; 392 if (lvt->lvt_activehi == 0) 393 value |= APIC_LVT_IIPP_INTALO; 394 if (lvt->lvt_masked) 395 value |= APIC_LVT_M; 396 value |= lvt->lvt_mode; 397 switch (lvt->lvt_mode) { 398 case APIC_LVT_DM_NMI: 399 case APIC_LVT_DM_SMI: 400 case APIC_LVT_DM_INIT: 401 case APIC_LVT_DM_EXTINT: 402 if (!lvt->lvt_edgetrigger && bootverbose) { 403 printf("lapic%u: Forcing LINT%u to edge trigger\n", 404 la->la_id, pin); 405 value &= ~APIC_LVT_TM; 406 } 407 /* Use a vector of 0. */ 408 break; 409 case APIC_LVT_DM_FIXED: 410 value |= lvt->lvt_vector; 411 break; 412 default: 413 panic("bad APIC LVT delivery mode: %#x\n", value); 414 } 415 return (value); 416 } 417 418 static uint32_t 419 lvt_mode(struct lapic *la, u_int pin, uint32_t value) 420 { 421 struct lvt *lvt; 422 423 KASSERT(pin <= APIC_LVT_MAX, 424 ("%s: pin %u out of range", __func__, pin)); 425 if (la->la_lvts[pin].lvt_active) 426 lvt = &la->la_lvts[pin]; 427 else 428 lvt = &lvts[pin]; 429 430 return (lvt_mode_impl(la, lvt, pin, value)); 431 } 432 433 static uint32_t 434 elvt_mode(struct lapic *la, u_int idx, uint32_t value) 435 { 436 struct lvt *elvt; 437 438 KASSERT(idx <= APIC_ELVT_MAX, 439 ("%s: idx %u out of range", __func__, idx)); 440 441 elvt = &la->la_elvts[idx]; 442 KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx)); 443 KASSERT(elvt->lvt_edgetrigger, 444 ("%s: ELVT%u is not edge triggered", __func__, idx)); 445 KASSERT(elvt->lvt_activehi, 446 ("%s: ELVT%u is not active high", __func__, idx)); 447 return (lvt_mode_impl(la, elvt, idx, value)); 448 } 449 450 /* 451 * Map the local APIC and setup necessary interrupt vectors. 452 */ 453 static void 454 native_lapic_init(vm_paddr_t addr) 455 { 456 #ifdef SMP 457 uint64_t r, r1, r2, rx; 458 #endif 459 uint32_t ver; 460 u_int regs[4]; 461 int i, arat; 462 463 /* 464 * Enable x2APIC mode if possible. Map the local APIC 465 * registers page. 466 * 467 * Keep the LAPIC registers page mapped uncached for x2APIC 468 * mode too, to have direct map page attribute set to 469 * uncached. This is needed to work around CPU errata present 470 * on all Intel processors. 471 */ 472 KASSERT(trunc_page(addr) == addr, 473 ("local APIC not aligned on a page boundary")); 474 lapic_paddr = addr; 475 lapic_map = pmap_mapdev(addr, PAGE_SIZE); 476 if (x2apic_mode) { 477 native_lapic_enable_x2apic(); 478 lapic_map = NULL; 479 } 480 481 /* Setup the spurious interrupt handler. */ 482 setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL, 483 GSEL_APIC); 484 485 /* Perform basic initialization of the BSP's local APIC. */ 486 lapic_enable(); 487 488 /* Set BSP's per-CPU local APIC ID. */ 489 PCPU_SET(apic_id, lapic_id()); 490 491 /* Local APIC timer interrupt. */ 492 setidt(APIC_TIMER_INT, IDTVEC(timerint), SDT_APIC, SEL_KPL, GSEL_APIC); 493 494 /* Local APIC error interrupt. */ 495 setidt(APIC_ERROR_INT, IDTVEC(errorint), SDT_APIC, SEL_KPL, GSEL_APIC); 496 497 /* XXX: Thermal interrupt */ 498 499 /* Local APIC CMCI. */ 500 setidt(APIC_CMC_INT, IDTVEC(cmcint), SDT_APICT, SEL_KPL, GSEL_APIC); 501 502 if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) { 503 arat = 0; 504 /* Intel CPUID 0x06 EAX[2] set if APIC timer runs in C3. */ 505 if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_high >= 6) { 506 do_cpuid(0x06, regs); 507 if ((regs[0] & CPUTPM1_ARAT) != 0) 508 arat = 1; 509 } 510 bzero(&lapic_et, sizeof(lapic_et)); 511 lapic_et.et_name = "LAPIC"; 512 lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT | 513 ET_FLAGS_PERCPU; 514 lapic_et.et_quality = 600; 515 if (!arat) { 516 lapic_et.et_flags |= ET_FLAGS_C3STOP; 517 lapic_et.et_quality = 100; 518 } 519 if ((cpu_feature & CPUID_TSC) != 0 && 520 (cpu_feature2 & CPUID2_TSCDLT) != 0 && 521 tsc_is_invariant && tsc_freq != 0) { 522 lapic_timer_tsc_deadline = 1; 523 TUNABLE_INT_FETCH("hw.lapic_tsc_deadline", 524 &lapic_timer_tsc_deadline); 525 } 526 527 lapic_et.et_frequency = 0; 528 /* We don't know frequency yet, so trying to guess. */ 529 lapic_et.et_min_period = 0x00001000LL; 530 lapic_et.et_max_period = SBT_1S; 531 lapic_et.et_start = lapic_et_start; 532 lapic_et.et_stop = lapic_et_stop; 533 lapic_et.et_priv = NULL; 534 et_register(&lapic_et); 535 } 536 537 /* 538 * Set lapic_eoi_suppression after lapic_enable(), to not 539 * enable suppression in the hardware prematurely. Note that 540 * we by default enable suppression even when system only has 541 * one IO-APIC, since EOI is broadcasted to all APIC agents, 542 * including CPUs, otherwise. 543 * 544 * It seems that at least some KVM versions report 545 * EOI_SUPPRESSION bit, but auto-EOI does not work. 546 */ 547 ver = lapic_read32(LAPIC_VERSION); 548 if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) { 549 lapic_eoi_suppression = 1; 550 if (vm_guest == VM_GUEST_KVM) { 551 if (bootverbose) 552 printf( 553 "KVM -- disabling lapic eoi suppression\n"); 554 lapic_eoi_suppression = 0; 555 } 556 TUNABLE_INT_FETCH("hw.lapic_eoi_suppression", 557 &lapic_eoi_suppression); 558 } 559 560 #ifdef SMP 561 #define LOOPS 100000 562 /* 563 * Calibrate the busy loop waiting for IPI ack in xAPIC mode. 564 * lapic_ipi_wait_mult contains the number of iterations which 565 * approximately delay execution for 1 microsecond (the 566 * argument to native_lapic_ipi_wait() is in microseconds). 567 * 568 * We assume that TSC is present and already measured. 569 * Possible TSC frequency jumps are irrelevant to the 570 * calibration loop below, the CPU clock management code is 571 * not yet started, and we do not enter sleep states. 572 */ 573 KASSERT((cpu_feature & CPUID_TSC) != 0 && tsc_freq != 0, 574 ("TSC not initialized")); 575 if (!x2apic_mode) { 576 r = rdtsc(); 577 for (rx = 0; rx < LOOPS; rx++) { 578 (void)lapic_read_icr_lo(); 579 ia32_pause(); 580 } 581 r = rdtsc() - r; 582 r1 = tsc_freq * LOOPS; 583 r2 = r * 1000000; 584 lapic_ipi_wait_mult = r1 >= r2 ? r1 / r2 : 1; 585 if (bootverbose) { 586 printf("LAPIC: ipi_wait() us multiplier %ju (r %ju " 587 "tsc %ju)\n", (uintmax_t)lapic_ipi_wait_mult, 588 (uintmax_t)r, (uintmax_t)tsc_freq); 589 } 590 } 591 #undef LOOPS 592 #endif /* SMP */ 593 } 594 595 /* 596 * Create a local APIC instance. 597 */ 598 static void 599 native_lapic_create(u_int apic_id, int boot_cpu) 600 { 601 int i; 602 603 if (apic_id > MAX_APIC_ID) { 604 printf("APIC: Ignoring local APIC with ID %d\n", apic_id); 605 if (boot_cpu) 606 panic("Can't ignore BSP"); 607 return; 608 } 609 KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u", 610 apic_id)); 611 612 /* 613 * Assume no local LVT overrides and a cluster of 0 and 614 * intra-cluster ID of 0. 615 */ 616 lapics[apic_id].la_present = 1; 617 lapics[apic_id].la_id = apic_id; 618 for (i = 0; i <= APIC_LVT_MAX; i++) { 619 lapics[apic_id].la_lvts[i] = lvts[i]; 620 lapics[apic_id].la_lvts[i].lvt_active = 0; 621 } 622 for (i = 0; i <= APIC_ELVT_MAX; i++) { 623 lapics[apic_id].la_elvts[i] = elvts[i]; 624 lapics[apic_id].la_elvts[i].lvt_active = 0; 625 } 626 for (i = 0; i <= APIC_NUM_IOINTS; i++) 627 lapics[apic_id].la_ioint_irqs[i] = -1; 628 lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; 629 lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = 630 IRQ_TIMER; 631 #ifdef KDTRACE_HOOKS 632 lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = 633 IRQ_DTRACE_RET; 634 #endif 635 #ifdef XENHVM 636 lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN; 637 #endif 638 639 640 #ifdef SMP 641 cpu_add(apic_id, boot_cpu); 642 #endif 643 } 644 645 static inline uint32_t 646 amd_read_ext_features(void) 647 { 648 uint32_t version; 649 650 if (cpu_vendor_id != CPU_VENDOR_AMD) 651 return (0); 652 version = lapic_read32(LAPIC_VERSION); 653 if ((version & APIC_VER_AMD_EXT_SPACE) != 0) 654 return (lapic_read32(LAPIC_EXT_FEATURES)); 655 else 656 return (0); 657 } 658 659 static inline uint32_t 660 amd_read_elvt_count(void) 661 { 662 uint32_t extf; 663 uint32_t count; 664 665 extf = amd_read_ext_features(); 666 count = (extf & APIC_EXTF_ELVT_MASK) >> APIC_EXTF_ELVT_SHIFT; 667 count = min(count, APIC_ELVT_MAX + 1); 668 return (count); 669 } 670 671 /* 672 * Dump contents of local APIC registers 673 */ 674 static void 675 native_lapic_dump(const char* str) 676 { 677 uint32_t version; 678 uint32_t maxlvt; 679 uint32_t extf; 680 int elvt_count; 681 int i; 682 683 version = lapic_read32(LAPIC_VERSION); 684 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 685 printf("cpu%d %s:\n", PCPU_GET(cpuid), str); 686 printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x", 687 lapic_read32(LAPIC_ID), version, 688 lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR)); 689 if ((cpu_feature2 & CPUID2_X2APIC) != 0) 690 printf(" x2APIC: %d", x2apic_mode); 691 printf("\n lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", 692 lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1), 693 lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR)); 694 printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x", 695 lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL), 696 lapic_read32(LAPIC_LVT_ERROR)); 697 if (maxlvt >= APIC_LVT_PMC) 698 printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT)); 699 printf("\n"); 700 if (maxlvt >= APIC_LVT_CMCI) 701 printf(" cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI)); 702 extf = amd_read_ext_features(); 703 if (extf != 0) { 704 printf(" AMD ext features: 0x%08x\n", extf); 705 elvt_count = amd_read_elvt_count(); 706 for (i = 0; i < elvt_count; i++) 707 printf(" AMD elvt%d: 0x%08x\n", i, 708 lapic_read32(LAPIC_EXT_LVT0 + i)); 709 } 710 } 711 712 static void 713 native_lapic_xapic_mode(void) 714 { 715 register_t saveintr; 716 717 saveintr = intr_disable(); 718 if (x2apic_mode) 719 native_lapic_enable_x2apic(); 720 intr_restore(saveintr); 721 } 722 723 static void 724 native_lapic_setup(int boot) 725 { 726 struct lapic *la; 727 uint32_t version; 728 uint32_t maxlvt; 729 register_t saveintr; 730 char buf[MAXCOMLEN + 1]; 731 int elvt_count; 732 int i; 733 734 saveintr = intr_disable(); 735 736 la = &lapics[lapic_id()]; 737 KASSERT(la->la_present, ("missing APIC structure")); 738 version = lapic_read32(LAPIC_VERSION); 739 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 740 741 /* Initialize the TPR to allow all interrupts. */ 742 lapic_set_tpr(0); 743 744 /* Setup spurious vector and enable the local APIC. */ 745 lapic_enable(); 746 747 /* Program LINT[01] LVT entries. */ 748 lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0, 749 lapic_read32(LAPIC_LVT_LINT0))); 750 lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1, 751 lapic_read32(LAPIC_LVT_LINT1))); 752 753 /* Program the PMC LVT entry if present. */ 754 if (maxlvt >= APIC_LVT_PMC) { 755 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 756 LAPIC_LVT_PCINT)); 757 } 758 759 /* Program timer LVT and setup handler. */ 760 la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER, 761 lapic_read32(LAPIC_LVT_TIMER)); 762 la->lvt_timer_last = la->lvt_timer_base; 763 lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base); 764 if (boot) { 765 snprintf(buf, sizeof(buf), "cpu%d:timer", PCPU_GET(cpuid)); 766 intrcnt_add(buf, &la->la_timer_count); 767 } 768 769 /* Setup the timer if configured. */ 770 if (la->la_timer_mode != LAT_MODE_UNDEF) { 771 KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor", 772 lapic_id())); 773 switch (la->la_timer_mode) { 774 case LAT_MODE_PERIODIC: 775 lapic_timer_set_divisor(lapic_timer_divisor); 776 lapic_timer_periodic(la); 777 break; 778 case LAT_MODE_ONESHOT: 779 lapic_timer_set_divisor(lapic_timer_divisor); 780 lapic_timer_oneshot(la); 781 break; 782 case LAT_MODE_DEADLINE: 783 lapic_timer_deadline(la); 784 break; 785 default: 786 panic("corrupted la_timer_mode %p %d", la, 787 la->la_timer_mode); 788 } 789 } 790 791 /* Program error LVT and clear any existing errors. */ 792 lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR, 793 lapic_read32(LAPIC_LVT_ERROR))); 794 lapic_write32(LAPIC_ESR, 0); 795 796 /* XXX: Thermal LVT */ 797 798 /* Program the CMCI LVT entry if present. */ 799 if (maxlvt >= APIC_LVT_CMCI) { 800 lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI, 801 lapic_read32(LAPIC_LVT_CMCI))); 802 } 803 804 elvt_count = amd_read_elvt_count(); 805 for (i = 0; i < elvt_count; i++) { 806 if (la->la_elvts[i].lvt_active) 807 lapic_write32(LAPIC_EXT_LVT0 + i, 808 elvt_mode(la, i, lapic_read32(LAPIC_EXT_LVT0 + i))); 809 } 810 811 intr_restore(saveintr); 812 } 813 814 static void 815 native_lapic_reenable_pmc(void) 816 { 817 #ifdef HWPMC_HOOKS 818 uint32_t value; 819 820 value = lapic_read32(LAPIC_LVT_PCINT); 821 value &= ~APIC_LVT_M; 822 lapic_write32(LAPIC_LVT_PCINT, value); 823 #endif 824 } 825 826 #ifdef HWPMC_HOOKS 827 static void 828 lapic_update_pmc(void *dummy) 829 { 830 struct lapic *la; 831 832 la = &lapics[lapic_id()]; 833 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 834 lapic_read32(LAPIC_LVT_PCINT))); 835 } 836 #endif 837 838 static int 839 native_lapic_enable_pmc(void) 840 { 841 #ifdef HWPMC_HOOKS 842 u_int32_t maxlvt; 843 844 /* Fail if the local APIC is not present. */ 845 if (!x2apic_mode && lapic_map == NULL) 846 return (0); 847 848 /* Fail if the PMC LVT is not present. */ 849 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 850 if (maxlvt < APIC_LVT_PMC) 851 return (0); 852 853 lvts[APIC_LVT_PMC].lvt_masked = 0; 854 855 #ifdef EARLY_AP_STARTUP 856 MPASS(mp_ncpus == 1 || smp_started); 857 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 858 #else 859 #ifdef SMP 860 /* 861 * If hwpmc was loaded at boot time then the APs may not be 862 * started yet. In that case, don't forward the request to 863 * them as they will program the lvt when they start. 864 */ 865 if (smp_started) 866 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 867 else 868 #endif 869 lapic_update_pmc(NULL); 870 #endif 871 return (1); 872 #else 873 return (0); 874 #endif 875 } 876 877 static void 878 native_lapic_disable_pmc(void) 879 { 880 #ifdef HWPMC_HOOKS 881 u_int32_t maxlvt; 882 883 /* Fail if the local APIC is not present. */ 884 if (!x2apic_mode && lapic_map == NULL) 885 return; 886 887 /* Fail if the PMC LVT is not present. */ 888 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 889 if (maxlvt < APIC_LVT_PMC) 890 return; 891 892 lvts[APIC_LVT_PMC].lvt_masked = 1; 893 894 #ifdef SMP 895 /* The APs should always be started when hwpmc is unloaded. */ 896 KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early")); 897 #endif 898 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 899 #endif 900 } 901 902 static void 903 lapic_calibrate_initcount(struct eventtimer *et, struct lapic *la) 904 { 905 u_long value; 906 907 /* Start off with a divisor of 2 (power on reset default). */ 908 lapic_timer_divisor = 2; 909 /* Try to calibrate the local APIC timer. */ 910 do { 911 lapic_timer_set_divisor(lapic_timer_divisor); 912 lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT); 913 DELAY(1000000); 914 value = APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER); 915 if (value != APIC_TIMER_MAX_COUNT) 916 break; 917 lapic_timer_divisor <<= 1; 918 } while (lapic_timer_divisor <= 128); 919 if (lapic_timer_divisor > 128) 920 panic("lapic: Divisor too big"); 921 if (bootverbose) { 922 printf("lapic: Divisor %lu, Frequency %lu Hz\n", 923 lapic_timer_divisor, value); 924 } 925 count_freq = value; 926 } 927 928 static void 929 lapic_calibrate_deadline(struct eventtimer *et, struct lapic *la __unused) 930 { 931 932 if (bootverbose) { 933 printf("lapic: deadline tsc mode, Frequency %ju Hz\n", 934 (uintmax_t)tsc_freq); 935 } 936 } 937 938 static void 939 lapic_change_mode(struct eventtimer *et, struct lapic *la, 940 enum lat_timer_mode newmode) 941 { 942 943 if (la->la_timer_mode == newmode) 944 return; 945 switch (newmode) { 946 case LAT_MODE_PERIODIC: 947 lapic_timer_set_divisor(lapic_timer_divisor); 948 et->et_frequency = count_freq; 949 break; 950 case LAT_MODE_DEADLINE: 951 et->et_frequency = tsc_freq; 952 break; 953 case LAT_MODE_ONESHOT: 954 lapic_timer_set_divisor(lapic_timer_divisor); 955 et->et_frequency = count_freq; 956 break; 957 default: 958 panic("lapic_change_mode %d", newmode); 959 } 960 la->la_timer_mode = newmode; 961 et->et_min_period = (0x00000002LLU << 32) / et->et_frequency; 962 et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency; 963 } 964 965 static int 966 lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) 967 { 968 struct lapic *la; 969 970 la = &lapics[PCPU_GET(apic_id)]; 971 if (et->et_frequency == 0) { 972 lapic_calibrate_initcount(et, la); 973 if (lapic_timer_tsc_deadline) 974 lapic_calibrate_deadline(et, la); 975 } 976 if (period != 0) { 977 lapic_change_mode(et, la, LAT_MODE_PERIODIC); 978 la->la_timer_period = ((uint32_t)et->et_frequency * period) >> 979 32; 980 lapic_timer_periodic(la); 981 } else if (lapic_timer_tsc_deadline) { 982 lapic_change_mode(et, la, LAT_MODE_DEADLINE); 983 la->la_timer_period = (et->et_frequency * first) >> 32; 984 lapic_timer_deadline(la); 985 } else { 986 lapic_change_mode(et, la, LAT_MODE_ONESHOT); 987 la->la_timer_period = ((uint32_t)et->et_frequency * first) >> 988 32; 989 lapic_timer_oneshot(la); 990 } 991 return (0); 992 } 993 994 static int 995 lapic_et_stop(struct eventtimer *et) 996 { 997 struct lapic *la; 998 999 la = &lapics[PCPU_GET(apic_id)]; 1000 lapic_timer_stop(la); 1001 la->la_timer_mode = LAT_MODE_UNDEF; 1002 return (0); 1003 } 1004 1005 static void 1006 native_lapic_disable(void) 1007 { 1008 uint32_t value; 1009 1010 /* Software disable the local APIC. */ 1011 value = lapic_read32(LAPIC_SVR); 1012 value &= ~APIC_SVR_SWEN; 1013 lapic_write32(LAPIC_SVR, value); 1014 } 1015 1016 static void 1017 lapic_enable(void) 1018 { 1019 uint32_t value; 1020 1021 /* Program the spurious vector to enable the local APIC. */ 1022 value = lapic_read32(LAPIC_SVR); 1023 value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS); 1024 value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT; 1025 if (lapic_eoi_suppression) 1026 value |= APIC_SVR_EOI_SUPPRESSION; 1027 lapic_write32(LAPIC_SVR, value); 1028 } 1029 1030 /* Reset the local APIC on the BSP during resume. */ 1031 static void 1032 lapic_resume(struct pic *pic, bool suspend_cancelled) 1033 { 1034 1035 lapic_setup(0); 1036 } 1037 1038 static int 1039 native_lapic_id(void) 1040 { 1041 uint32_t v; 1042 1043 KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped")); 1044 v = lapic_read32(LAPIC_ID); 1045 if (!x2apic_mode) 1046 v >>= APIC_ID_SHIFT; 1047 return (v); 1048 } 1049 1050 static int 1051 native_lapic_intr_pending(u_int vector) 1052 { 1053 uint32_t irr; 1054 1055 /* 1056 * The IRR registers are an array of registers each of which 1057 * only describes 32 interrupts in the low 32 bits. Thus, we 1058 * divide the vector by 32 to get the register index. 1059 * Finally, we modulus the vector by 32 to determine the 1060 * individual bit to test. 1061 */ 1062 irr = lapic_read32(LAPIC_IRR0 + vector / 32); 1063 return (irr & 1 << (vector % 32)); 1064 } 1065 1066 static void 1067 native_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) 1068 { 1069 struct lapic *la; 1070 1071 KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist", 1072 __func__, apic_id)); 1073 KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big", 1074 __func__, cluster)); 1075 KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID, 1076 ("%s: intra cluster id %u too big", __func__, cluster_id)); 1077 la = &lapics[apic_id]; 1078 la->la_cluster = cluster; 1079 la->la_cluster_id = cluster_id; 1080 } 1081 1082 static int 1083 native_lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked) 1084 { 1085 1086 if (pin > APIC_LVT_MAX) 1087 return (EINVAL); 1088 if (apic_id == APIC_ID_ALL) { 1089 lvts[pin].lvt_masked = masked; 1090 if (bootverbose) 1091 printf("lapic:"); 1092 } else { 1093 KASSERT(lapics[apic_id].la_present, 1094 ("%s: missing APIC %u", __func__, apic_id)); 1095 lapics[apic_id].la_lvts[pin].lvt_masked = masked; 1096 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1097 if (bootverbose) 1098 printf("lapic%u:", apic_id); 1099 } 1100 if (bootverbose) 1101 printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked"); 1102 return (0); 1103 } 1104 1105 static int 1106 native_lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode) 1107 { 1108 struct lvt *lvt; 1109 1110 if (pin > APIC_LVT_MAX) 1111 return (EINVAL); 1112 if (apic_id == APIC_ID_ALL) { 1113 lvt = &lvts[pin]; 1114 if (bootverbose) 1115 printf("lapic:"); 1116 } else { 1117 KASSERT(lapics[apic_id].la_present, 1118 ("%s: missing APIC %u", __func__, apic_id)); 1119 lvt = &lapics[apic_id].la_lvts[pin]; 1120 lvt->lvt_active = 1; 1121 if (bootverbose) 1122 printf("lapic%u:", apic_id); 1123 } 1124 lvt->lvt_mode = mode; 1125 switch (mode) { 1126 case APIC_LVT_DM_NMI: 1127 case APIC_LVT_DM_SMI: 1128 case APIC_LVT_DM_INIT: 1129 case APIC_LVT_DM_EXTINT: 1130 lvt->lvt_edgetrigger = 1; 1131 lvt->lvt_activehi = 1; 1132 if (mode == APIC_LVT_DM_EXTINT) 1133 lvt->lvt_masked = 1; 1134 else 1135 lvt->lvt_masked = 0; 1136 break; 1137 default: 1138 panic("Unsupported delivery mode: 0x%x\n", mode); 1139 } 1140 if (bootverbose) { 1141 printf(" Routing "); 1142 switch (mode) { 1143 case APIC_LVT_DM_NMI: 1144 printf("NMI"); 1145 break; 1146 case APIC_LVT_DM_SMI: 1147 printf("SMI"); 1148 break; 1149 case APIC_LVT_DM_INIT: 1150 printf("INIT"); 1151 break; 1152 case APIC_LVT_DM_EXTINT: 1153 printf("ExtINT"); 1154 break; 1155 } 1156 printf(" -> LINT%u\n", pin); 1157 } 1158 return (0); 1159 } 1160 1161 static int 1162 native_lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol) 1163 { 1164 1165 if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM) 1166 return (EINVAL); 1167 if (apic_id == APIC_ID_ALL) { 1168 lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); 1169 if (bootverbose) 1170 printf("lapic:"); 1171 } else { 1172 KASSERT(lapics[apic_id].la_present, 1173 ("%s: missing APIC %u", __func__, apic_id)); 1174 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1175 lapics[apic_id].la_lvts[pin].lvt_activehi = 1176 (pol == INTR_POLARITY_HIGH); 1177 if (bootverbose) 1178 printf("lapic%u:", apic_id); 1179 } 1180 if (bootverbose) 1181 printf(" LINT%u polarity: %s\n", pin, 1182 pol == INTR_POLARITY_HIGH ? "high" : "low"); 1183 return (0); 1184 } 1185 1186 static int 1187 native_lapic_set_lvt_triggermode(u_int apic_id, u_int pin, 1188 enum intr_trigger trigger) 1189 { 1190 1191 if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM) 1192 return (EINVAL); 1193 if (apic_id == APIC_ID_ALL) { 1194 lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); 1195 if (bootverbose) 1196 printf("lapic:"); 1197 } else { 1198 KASSERT(lapics[apic_id].la_present, 1199 ("%s: missing APIC %u", __func__, apic_id)); 1200 lapics[apic_id].la_lvts[pin].lvt_edgetrigger = 1201 (trigger == INTR_TRIGGER_EDGE); 1202 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1203 if (bootverbose) 1204 printf("lapic%u:", apic_id); 1205 } 1206 if (bootverbose) 1207 printf(" LINT%u trigger: %s\n", pin, 1208 trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); 1209 return (0); 1210 } 1211 1212 /* 1213 * Adjust the TPR of the current CPU so that it blocks all interrupts below 1214 * the passed in vector. 1215 */ 1216 static void 1217 lapic_set_tpr(u_int vector) 1218 { 1219 #ifdef CHEAP_TPR 1220 lapic_write32(LAPIC_TPR, vector); 1221 #else 1222 uint32_t tpr; 1223 1224 tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO; 1225 tpr |= vector; 1226 lapic_write32(LAPIC_TPR, tpr); 1227 #endif 1228 } 1229 1230 static void 1231 native_lapic_eoi(void) 1232 { 1233 1234 lapic_write32_nofence(LAPIC_EOI, 0); 1235 } 1236 1237 void 1238 lapic_handle_intr(int vector, struct trapframe *frame) 1239 { 1240 struct intsrc *isrc; 1241 1242 isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id), 1243 vector)); 1244 intr_execute_handlers(isrc, frame); 1245 } 1246 1247 void 1248 lapic_handle_timer(struct trapframe *frame) 1249 { 1250 struct lapic *la; 1251 struct trapframe *oldframe; 1252 struct thread *td; 1253 1254 /* Send EOI first thing. */ 1255 lapic_eoi(); 1256 1257 #if defined(SMP) && !defined(SCHED_ULE) 1258 /* 1259 * Don't do any accounting for the disabled HTT cores, since it 1260 * will provide misleading numbers for the userland. 1261 * 1262 * No locking is necessary here, since even if we lose the race 1263 * when hlt_cpus_mask changes it is not a big deal, really. 1264 * 1265 * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask 1266 * and unlike other schedulers it actually schedules threads to 1267 * those CPUs. 1268 */ 1269 if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)) 1270 return; 1271 #endif 1272 1273 /* Look up our local APIC structure for the tick counters. */ 1274 la = &lapics[PCPU_GET(apic_id)]; 1275 (*la->la_timer_count)++; 1276 critical_enter(); 1277 if (lapic_et.et_active) { 1278 td = curthread; 1279 td->td_intr_nesting_level++; 1280 oldframe = td->td_intr_frame; 1281 td->td_intr_frame = frame; 1282 lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg); 1283 td->td_intr_frame = oldframe; 1284 td->td_intr_nesting_level--; 1285 } 1286 critical_exit(); 1287 } 1288 1289 static void 1290 lapic_timer_set_divisor(u_int divisor) 1291 { 1292 1293 KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor)); 1294 KASSERT(ffs(divisor) <= nitems(lapic_timer_divisors), 1295 ("lapic: invalid divisor %u", divisor)); 1296 lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]); 1297 } 1298 1299 static void 1300 lapic_timer_oneshot(struct lapic *la) 1301 { 1302 uint32_t value; 1303 1304 value = la->lvt_timer_base; 1305 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1306 value |= APIC_LVTT_TM_ONE_SHOT; 1307 la->lvt_timer_last = value; 1308 lapic_write32(LAPIC_LVT_TIMER, value); 1309 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1310 } 1311 1312 static void 1313 lapic_timer_oneshot_nointr(struct lapic *la, uint32_t count) 1314 { 1315 uint32_t value; 1316 1317 value = la->lvt_timer_base; 1318 value &= ~APIC_LVTT_TM; 1319 value |= APIC_LVTT_TM_ONE_SHOT | APIC_LVT_M; 1320 la->lvt_timer_last = value; 1321 lapic_write32(LAPIC_LVT_TIMER, value); 1322 lapic_write32(LAPIC_ICR_TIMER, count); 1323 } 1324 1325 static void 1326 lapic_timer_periodic(struct lapic *la) 1327 { 1328 uint32_t value; 1329 1330 value = la->lvt_timer_base; 1331 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1332 value |= APIC_LVTT_TM_PERIODIC; 1333 la->lvt_timer_last = value; 1334 lapic_write32(LAPIC_LVT_TIMER, value); 1335 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1336 } 1337 1338 static void 1339 lapic_timer_deadline(struct lapic *la) 1340 { 1341 uint32_t value; 1342 1343 value = la->lvt_timer_base; 1344 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1345 value |= APIC_LVTT_TM_TSCDLT; 1346 if (value != la->lvt_timer_last) { 1347 la->lvt_timer_last = value; 1348 lapic_write32_nofence(LAPIC_LVT_TIMER, value); 1349 if (!x2apic_mode) 1350 mfence(); 1351 } 1352 wrmsr(MSR_TSC_DEADLINE, la->la_timer_period + rdtsc()); 1353 } 1354 1355 static void 1356 lapic_timer_stop(struct lapic *la) 1357 { 1358 uint32_t value; 1359 1360 if (la->la_timer_mode == LAT_MODE_DEADLINE) { 1361 wrmsr(MSR_TSC_DEADLINE, 0); 1362 mfence(); 1363 } else { 1364 value = la->lvt_timer_base; 1365 value &= ~APIC_LVTT_TM; 1366 value |= APIC_LVT_M; 1367 la->lvt_timer_last = value; 1368 lapic_write32(LAPIC_LVT_TIMER, value); 1369 } 1370 } 1371 1372 void 1373 lapic_handle_cmc(void) 1374 { 1375 1376 lapic_eoi(); 1377 cmc_intr(); 1378 } 1379 1380 /* 1381 * Called from the mca_init() to activate the CMC interrupt if this CPU is 1382 * responsible for monitoring any MC banks for CMC events. Since mca_init() 1383 * is called prior to lapic_setup() during boot, this just needs to unmask 1384 * this CPU's LVT_CMCI entry. 1385 */ 1386 static void 1387 native_lapic_enable_cmc(void) 1388 { 1389 u_int apic_id; 1390 1391 #ifdef DEV_ATPIC 1392 if (!x2apic_mode && lapic_map == NULL) 1393 return; 1394 #endif 1395 apic_id = PCPU_GET(apic_id); 1396 KASSERT(lapics[apic_id].la_present, 1397 ("%s: missing APIC %u", __func__, apic_id)); 1398 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0; 1399 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1; 1400 if (bootverbose) 1401 printf("lapic%u: CMCI unmasked\n", apic_id); 1402 } 1403 1404 static int 1405 native_lapic_enable_mca_elvt(void) 1406 { 1407 u_int apic_id; 1408 uint32_t value; 1409 int elvt_count; 1410 1411 #ifdef DEV_ATPIC 1412 if (lapic_map == NULL) 1413 return (-1); 1414 #endif 1415 1416 apic_id = PCPU_GET(apic_id); 1417 KASSERT(lapics[apic_id].la_present, 1418 ("%s: missing APIC %u", __func__, apic_id)); 1419 elvt_count = amd_read_elvt_count(); 1420 if (elvt_count <= APIC_ELVT_MCA) 1421 return (-1); 1422 1423 value = lapic_read32(LAPIC_EXT_LVT0 + APIC_ELVT_MCA); 1424 if ((value & APIC_LVT_M) == 0) { 1425 printf("AMD MCE Thresholding Extended LVT is already active\n"); 1426 return (-1); 1427 } 1428 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_masked = 0; 1429 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_active = 1; 1430 if (bootverbose) 1431 printf("lapic%u: MCE Thresholding ELVT unmasked\n", apic_id); 1432 return (APIC_ELVT_MCA); 1433 } 1434 1435 void 1436 lapic_handle_error(void) 1437 { 1438 uint32_t esr; 1439 1440 /* 1441 * Read the contents of the error status register. Write to 1442 * the register first before reading from it to force the APIC 1443 * to update its value to indicate any errors that have 1444 * occurred since the previous write to the register. 1445 */ 1446 lapic_write32(LAPIC_ESR, 0); 1447 esr = lapic_read32(LAPIC_ESR); 1448 1449 printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr); 1450 lapic_eoi(); 1451 } 1452 1453 static u_int 1454 native_apic_cpuid(u_int apic_id) 1455 { 1456 #ifdef SMP 1457 return apic_cpuids[apic_id]; 1458 #else 1459 return 0; 1460 #endif 1461 } 1462 1463 /* Request a free IDT vector to be used by the specified IRQ. */ 1464 static u_int 1465 native_apic_alloc_vector(u_int apic_id, u_int irq) 1466 { 1467 u_int vector; 1468 1469 KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq)); 1470 1471 /* 1472 * Search for a free vector. Currently we just use a very simple 1473 * algorithm to find the first free vector. 1474 */ 1475 mtx_lock_spin(&icu_lock); 1476 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1477 if (lapics[apic_id].la_ioint_irqs[vector] != -1) 1478 continue; 1479 lapics[apic_id].la_ioint_irqs[vector] = irq; 1480 mtx_unlock_spin(&icu_lock); 1481 return (vector + APIC_IO_INTS); 1482 } 1483 mtx_unlock_spin(&icu_lock); 1484 return (0); 1485 } 1486 1487 /* 1488 * Request 'count' free contiguous IDT vectors to be used by 'count' 1489 * IRQs. 'count' must be a power of two and the vectors will be 1490 * aligned on a boundary of 'align'. If the request cannot be 1491 * satisfied, 0 is returned. 1492 */ 1493 static u_int 1494 native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) 1495 { 1496 u_int first, run, vector; 1497 1498 KASSERT(powerof2(count), ("bad count")); 1499 KASSERT(powerof2(align), ("bad align")); 1500 KASSERT(align >= count, ("align < count")); 1501 #ifdef INVARIANTS 1502 for (run = 0; run < count; run++) 1503 KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u", 1504 irqs[run], run)); 1505 #endif 1506 1507 /* 1508 * Search for 'count' free vectors. As with apic_alloc_vector(), 1509 * this just uses a simple first fit algorithm. 1510 */ 1511 run = 0; 1512 first = 0; 1513 mtx_lock_spin(&icu_lock); 1514 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1515 1516 /* Vector is in use, end run. */ 1517 if (lapics[apic_id].la_ioint_irqs[vector] != -1) { 1518 run = 0; 1519 first = 0; 1520 continue; 1521 } 1522 1523 /* Start a new run if run == 0 and vector is aligned. */ 1524 if (run == 0) { 1525 if ((vector & (align - 1)) != 0) 1526 continue; 1527 first = vector; 1528 } 1529 run++; 1530 1531 /* Keep looping if the run isn't long enough yet. */ 1532 if (run < count) 1533 continue; 1534 1535 /* Found a run, assign IRQs and return the first vector. */ 1536 for (vector = 0; vector < count; vector++) 1537 lapics[apic_id].la_ioint_irqs[first + vector] = 1538 irqs[vector]; 1539 mtx_unlock_spin(&icu_lock); 1540 return (first + APIC_IO_INTS); 1541 } 1542 mtx_unlock_spin(&icu_lock); 1543 printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count); 1544 return (0); 1545 } 1546 1547 /* 1548 * Enable a vector for a particular apic_id. Since all lapics share idt 1549 * entries and ioint_handlers this enables the vector on all lapics. lapics 1550 * which do not have the vector configured would report spurious interrupts 1551 * should it fire. 1552 */ 1553 static void 1554 native_apic_enable_vector(u_int apic_id, u_int vector) 1555 { 1556 1557 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1558 KASSERT(ioint_handlers[vector / 32] != NULL, 1559 ("No ISR handler for vector %u", vector)); 1560 #ifdef KDTRACE_HOOKS 1561 KASSERT(vector != IDT_DTRACE_RET, 1562 ("Attempt to overwrite DTrace entry")); 1563 #endif 1564 setidt(vector, ioint_handlers[vector / 32], SDT_APIC, SEL_KPL, 1565 GSEL_APIC); 1566 } 1567 1568 static void 1569 native_apic_disable_vector(u_int apic_id, u_int vector) 1570 { 1571 1572 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1573 #ifdef KDTRACE_HOOKS 1574 KASSERT(vector != IDT_DTRACE_RET, 1575 ("Attempt to overwrite DTrace entry")); 1576 #endif 1577 KASSERT(ioint_handlers[vector / 32] != NULL, 1578 ("No ISR handler for vector %u", vector)); 1579 #ifdef notyet 1580 /* 1581 * We can not currently clear the idt entry because other cpus 1582 * may have a valid vector at this offset. 1583 */ 1584 setidt(vector, &IDTVEC(rsvd), SDT_APICT, SEL_KPL, GSEL_APIC); 1585 #endif 1586 } 1587 1588 /* Release an APIC vector when it's no longer in use. */ 1589 static void 1590 native_apic_free_vector(u_int apic_id, u_int vector, u_int irq) 1591 { 1592 struct thread *td; 1593 1594 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1595 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1596 ("Vector %u does not map to an IRQ line", vector)); 1597 KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq)); 1598 KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] == 1599 irq, ("IRQ mismatch")); 1600 #ifdef KDTRACE_HOOKS 1601 KASSERT(vector != IDT_DTRACE_RET, 1602 ("Attempt to overwrite DTrace entry")); 1603 #endif 1604 1605 /* 1606 * Bind us to the cpu that owned the vector before freeing it so 1607 * we don't lose an interrupt delivery race. 1608 */ 1609 td = curthread; 1610 if (!rebooting) { 1611 thread_lock(td); 1612 if (sched_is_bound(td)) 1613 panic("apic_free_vector: Thread already bound.\n"); 1614 sched_bind(td, apic_cpuid(apic_id)); 1615 thread_unlock(td); 1616 } 1617 mtx_lock_spin(&icu_lock); 1618 lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = -1; 1619 mtx_unlock_spin(&icu_lock); 1620 if (!rebooting) { 1621 thread_lock(td); 1622 sched_unbind(td); 1623 thread_unlock(td); 1624 } 1625 } 1626 1627 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */ 1628 static u_int 1629 apic_idt_to_irq(u_int apic_id, u_int vector) 1630 { 1631 int irq; 1632 1633 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1634 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1635 ("Vector %u does not map to an IRQ line", vector)); 1636 #ifdef KDTRACE_HOOKS 1637 KASSERT(vector != IDT_DTRACE_RET, 1638 ("Attempt to overwrite DTrace entry")); 1639 #endif 1640 irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]; 1641 if (irq < 0) 1642 irq = 0; 1643 return (irq); 1644 } 1645 1646 #ifdef DDB 1647 /* 1648 * Dump data about APIC IDT vector mappings. 1649 */ 1650 DB_SHOW_COMMAND(apic, db_show_apic) 1651 { 1652 struct intsrc *isrc; 1653 int i, verbose; 1654 u_int apic_id; 1655 u_int irq; 1656 1657 if (strcmp(modif, "vv") == 0) 1658 verbose = 2; 1659 else if (strcmp(modif, "v") == 0) 1660 verbose = 1; 1661 else 1662 verbose = 0; 1663 for (apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) { 1664 if (lapics[apic_id].la_present == 0) 1665 continue; 1666 db_printf("Interrupts bound to lapic %u\n", apic_id); 1667 for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) { 1668 irq = lapics[apic_id].la_ioint_irqs[i]; 1669 if (irq == -1 || irq == IRQ_SYSCALL) 1670 continue; 1671 #ifdef KDTRACE_HOOKS 1672 if (irq == IRQ_DTRACE_RET) 1673 continue; 1674 #endif 1675 #ifdef XENHVM 1676 if (irq == IRQ_EVTCHN) 1677 continue; 1678 #endif 1679 db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); 1680 if (irq == IRQ_TIMER) 1681 db_printf("lapic timer\n"); 1682 else if (irq < NUM_IO_INTS) { 1683 isrc = intr_lookup_source(irq); 1684 if (isrc == NULL || verbose == 0) 1685 db_printf("IRQ %u\n", irq); 1686 else 1687 db_dump_intr_event(isrc->is_event, 1688 verbose == 2); 1689 } else 1690 db_printf("IRQ %u ???\n", irq); 1691 } 1692 } 1693 } 1694 1695 static void 1696 dump_mask(const char *prefix, uint32_t v, int base) 1697 { 1698 int i, first; 1699 1700 first = 1; 1701 for (i = 0; i < 32; i++) 1702 if (v & (1 << i)) { 1703 if (first) { 1704 db_printf("%s:", prefix); 1705 first = 0; 1706 } 1707 db_printf(" %02x", base + i); 1708 } 1709 if (!first) 1710 db_printf("\n"); 1711 } 1712 1713 /* Show info from the lapic regs for this CPU. */ 1714 DB_SHOW_COMMAND(lapic, db_show_lapic) 1715 { 1716 uint32_t v; 1717 1718 db_printf("lapic ID = %d\n", lapic_id()); 1719 v = lapic_read32(LAPIC_VERSION); 1720 db_printf("version = %d.%d\n", (v & APIC_VER_VERSION) >> 4, 1721 v & 0xf); 1722 db_printf("max LVT = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT); 1723 v = lapic_read32(LAPIC_SVR); 1724 db_printf("SVR = %02x (%s)\n", v & APIC_SVR_VECTOR, 1725 v & APIC_SVR_ENABLE ? "enabled" : "disabled"); 1726 db_printf("TPR = %02x\n", lapic_read32(LAPIC_TPR)); 1727 1728 #define dump_field(prefix, regn, index) \ 1729 dump_mask(__XSTRING(prefix ## index), \ 1730 lapic_read32(LAPIC_ ## regn ## index), \ 1731 index * 32) 1732 1733 db_printf("In-service Interrupts:\n"); 1734 dump_field(isr, ISR, 0); 1735 dump_field(isr, ISR, 1); 1736 dump_field(isr, ISR, 2); 1737 dump_field(isr, ISR, 3); 1738 dump_field(isr, ISR, 4); 1739 dump_field(isr, ISR, 5); 1740 dump_field(isr, ISR, 6); 1741 dump_field(isr, ISR, 7); 1742 1743 db_printf("TMR Interrupts:\n"); 1744 dump_field(tmr, TMR, 0); 1745 dump_field(tmr, TMR, 1); 1746 dump_field(tmr, TMR, 2); 1747 dump_field(tmr, TMR, 3); 1748 dump_field(tmr, TMR, 4); 1749 dump_field(tmr, TMR, 5); 1750 dump_field(tmr, TMR, 6); 1751 dump_field(tmr, TMR, 7); 1752 1753 db_printf("IRR Interrupts:\n"); 1754 dump_field(irr, IRR, 0); 1755 dump_field(irr, IRR, 1); 1756 dump_field(irr, IRR, 2); 1757 dump_field(irr, IRR, 3); 1758 dump_field(irr, IRR, 4); 1759 dump_field(irr, IRR, 5); 1760 dump_field(irr, IRR, 6); 1761 dump_field(irr, IRR, 7); 1762 1763 #undef dump_field 1764 } 1765 #endif 1766 1767 /* 1768 * APIC probing support code. This includes code to manage enumerators. 1769 */ 1770 1771 static SLIST_HEAD(, apic_enumerator) enumerators = 1772 SLIST_HEAD_INITIALIZER(enumerators); 1773 static struct apic_enumerator *best_enum; 1774 1775 void 1776 apic_register_enumerator(struct apic_enumerator *enumerator) 1777 { 1778 #ifdef INVARIANTS 1779 struct apic_enumerator *apic_enum; 1780 1781 SLIST_FOREACH(apic_enum, &enumerators, apic_next) { 1782 if (apic_enum == enumerator) 1783 panic("%s: Duplicate register of %s", __func__, 1784 enumerator->apic_name); 1785 } 1786 #endif 1787 SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next); 1788 } 1789 1790 /* 1791 * We have to look for CPU's very, very early because certain subsystems 1792 * want to know how many CPU's we have extremely early on in the boot 1793 * process. 1794 */ 1795 static void 1796 apic_init(void *dummy __unused) 1797 { 1798 struct apic_enumerator *enumerator; 1799 int retval, best; 1800 1801 /* We only support built in local APICs. */ 1802 if (!(cpu_feature & CPUID_APIC)) 1803 return; 1804 1805 /* Don't probe if APIC mode is disabled. */ 1806 if (resource_disabled("apic", 0)) 1807 return; 1808 1809 /* Probe all the enumerators to find the best match. */ 1810 best_enum = NULL; 1811 best = 0; 1812 SLIST_FOREACH(enumerator, &enumerators, apic_next) { 1813 retval = enumerator->apic_probe(); 1814 if (retval > 0) 1815 continue; 1816 if (best_enum == NULL || best < retval) { 1817 best_enum = enumerator; 1818 best = retval; 1819 } 1820 } 1821 if (best_enum == NULL) { 1822 if (bootverbose) 1823 printf("APIC: Could not find any APICs.\n"); 1824 #ifndef DEV_ATPIC 1825 panic("running without device atpic requires a local APIC"); 1826 #endif 1827 return; 1828 } 1829 1830 if (bootverbose) 1831 printf("APIC: Using the %s enumerator.\n", 1832 best_enum->apic_name); 1833 1834 #ifdef I686_CPU 1835 /* 1836 * To work around an errata, we disable the local APIC on some 1837 * CPUs during early startup. We need to turn the local APIC back 1838 * on on such CPUs now. 1839 */ 1840 ppro_reenable_apic(); 1841 #endif 1842 1843 /* Probe the CPU's in the system. */ 1844 retval = best_enum->apic_probe_cpus(); 1845 if (retval != 0) 1846 printf("%s: Failed to probe CPUs: returned %d\n", 1847 best_enum->apic_name, retval); 1848 1849 } 1850 SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL); 1851 1852 /* 1853 * Setup the local APIC. We have to do this prior to starting up the APs 1854 * in the SMP case. 1855 */ 1856 static void 1857 apic_setup_local(void *dummy __unused) 1858 { 1859 int retval; 1860 1861 if (best_enum == NULL) 1862 return; 1863 1864 /* Initialize the local APIC. */ 1865 retval = best_enum->apic_setup_local(); 1866 if (retval != 0) 1867 printf("%s: Failed to setup the local APIC: returned %d\n", 1868 best_enum->apic_name, retval); 1869 } 1870 SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL); 1871 1872 /* 1873 * Setup the I/O APICs. 1874 */ 1875 static void 1876 apic_setup_io(void *dummy __unused) 1877 { 1878 int retval; 1879 1880 if (best_enum == NULL) 1881 return; 1882 1883 /* 1884 * Local APIC must be registered before other PICs and pseudo PICs 1885 * for proper suspend/resume order. 1886 */ 1887 intr_register_pic(&lapic_pic); 1888 1889 retval = best_enum->apic_setup_io(); 1890 if (retval != 0) 1891 printf("%s: Failed to setup I/O APICs: returned %d\n", 1892 best_enum->apic_name, retval); 1893 1894 /* 1895 * Finish setting up the local APIC on the BSP once we know 1896 * how to properly program the LINT pins. In particular, this 1897 * enables the EOI suppression mode, if LAPIC support it and 1898 * user did not disabled the mode. 1899 */ 1900 lapic_setup(1); 1901 if (bootverbose) 1902 lapic_dump("BSP"); 1903 1904 /* Enable the MSI "pic". */ 1905 init_ops.msi_init(); 1906 } 1907 SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL); 1908 1909 #ifdef SMP 1910 /* 1911 * Inter Processor Interrupt functions. The lapic_ipi_*() functions are 1912 * private to the MD code. The public interface for the rest of the 1913 * kernel is defined in mp_machdep.c. 1914 */ 1915 1916 /* 1917 * Wait delay microseconds for IPI to be sent. If delay is -1, we 1918 * wait forever. 1919 */ 1920 static int 1921 native_lapic_ipi_wait(int delay) 1922 { 1923 uint64_t rx; 1924 1925 /* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */ 1926 if (x2apic_mode) 1927 return (1); 1928 1929 for (rx = 0; delay == -1 || rx < lapic_ipi_wait_mult * delay; rx++) { 1930 if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) == 1931 APIC_DELSTAT_IDLE) 1932 return (1); 1933 ia32_pause(); 1934 } 1935 return (0); 1936 } 1937 1938 static void 1939 native_lapic_ipi_raw(register_t icrlo, u_int dest) 1940 { 1941 uint64_t icr; 1942 uint32_t vhi, vlo; 1943 register_t saveintr; 1944 1945 /* XXX: Need more sanity checking of icrlo? */ 1946 KASSERT(x2apic_mode || lapic_map != NULL, 1947 ("%s called too early", __func__)); 1948 KASSERT(x2apic_mode || 1949 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 1950 ("%s: invalid dest field", __func__)); 1951 KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0, 1952 ("%s: reserved bits set in ICR LO register", __func__)); 1953 1954 /* Set destination in ICR HI register if it is being used. */ 1955 if (!x2apic_mode) { 1956 saveintr = intr_disable(); 1957 icr = lapic_read_icr(); 1958 } 1959 1960 if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) { 1961 if (x2apic_mode) { 1962 vhi = dest; 1963 } else { 1964 vhi = icr >> 32; 1965 vhi &= ~APIC_ID_MASK; 1966 vhi |= dest << APIC_ID_SHIFT; 1967 } 1968 } else { 1969 vhi = 0; 1970 } 1971 1972 /* Program the contents of the IPI and dispatch it. */ 1973 if (x2apic_mode) { 1974 vlo = icrlo; 1975 } else { 1976 vlo = icr; 1977 vlo &= APIC_ICRLO_RESV_MASK; 1978 vlo |= icrlo; 1979 } 1980 lapic_write_icr(vhi, vlo); 1981 if (!x2apic_mode) 1982 intr_restore(saveintr); 1983 } 1984 1985 #define BEFORE_SPIN 50000 1986 #ifdef DETECT_DEADLOCK 1987 #define AFTER_SPIN 50 1988 #endif 1989 1990 static void 1991 native_lapic_ipi_vectored(u_int vector, int dest) 1992 { 1993 register_t icrlo, destfield; 1994 1995 KASSERT((vector & ~APIC_VECTOR_MASK) == 0, 1996 ("%s: invalid vector %d", __func__, vector)); 1997 1998 icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT; 1999 2000 /* 2001 * NMI IPIs are just fake vectors used to send a NMI. Use special rules 2002 * regarding NMIs if passed, otherwise specify the vector. 2003 */ 2004 if (vector >= IPI_NMI_FIRST) 2005 icrlo |= APIC_DELMODE_NMI; 2006 else 2007 icrlo |= vector | APIC_DELMODE_FIXED; 2008 destfield = 0; 2009 switch (dest) { 2010 case APIC_IPI_DEST_SELF: 2011 icrlo |= APIC_DEST_SELF; 2012 break; 2013 case APIC_IPI_DEST_ALL: 2014 icrlo |= APIC_DEST_ALLISELF; 2015 break; 2016 case APIC_IPI_DEST_OTHERS: 2017 icrlo |= APIC_DEST_ALLESELF; 2018 break; 2019 default: 2020 KASSERT(x2apic_mode || 2021 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 2022 ("%s: invalid destination 0x%x", __func__, dest)); 2023 destfield = dest; 2024 } 2025 2026 /* Wait for an earlier IPI to finish. */ 2027 if (!lapic_ipi_wait(BEFORE_SPIN)) { 2028 if (panicstr != NULL) 2029 return; 2030 else 2031 panic("APIC: Previous IPI is stuck"); 2032 } 2033 2034 lapic_ipi_raw(icrlo, destfield); 2035 2036 #ifdef DETECT_DEADLOCK 2037 /* Wait for IPI to be delivered. */ 2038 if (!lapic_ipi_wait(AFTER_SPIN)) { 2039 #ifdef needsattention 2040 /* 2041 * XXX FIXME: 2042 * 2043 * The above function waits for the message to actually be 2044 * delivered. It breaks out after an arbitrary timeout 2045 * since the message should eventually be delivered (at 2046 * least in theory) and that if it wasn't we would catch 2047 * the failure with the check above when the next IPI is 2048 * sent. 2049 * 2050 * We could skip this wait entirely, EXCEPT it probably 2051 * protects us from other routines that assume that the 2052 * message was delivered and acted upon when this function 2053 * returns. 2054 */ 2055 printf("APIC: IPI might be stuck\n"); 2056 #else /* !needsattention */ 2057 /* Wait until mesage is sent without a timeout. */ 2058 while (lapic_read_icr_lo() & APIC_DELSTAT_PEND) 2059 ia32_pause(); 2060 #endif /* needsattention */ 2061 } 2062 #endif /* DETECT_DEADLOCK */ 2063 } 2064 2065 #endif /* SMP */ 2066 2067 /* 2068 * Since the IDT is shared by all CPUs the IPI slot update needs to be globally 2069 * visible. 2070 * 2071 * Consider the case where an IPI is generated immediately after allocation: 2072 * vector = lapic_ipi_alloc(ipifunc); 2073 * ipi_selected(other_cpus, vector); 2074 * 2075 * In xAPIC mode a write to ICR_LO has serializing semantics because the 2076 * APIC page is mapped as an uncached region. In x2APIC mode there is an 2077 * explicit 'mfence' before the ICR MSR is written. Therefore in both cases 2078 * the IDT slot update is globally visible before the IPI is delivered. 2079 */ 2080 static int 2081 native_lapic_ipi_alloc(inthand_t *ipifunc) 2082 { 2083 struct gate_descriptor *ip; 2084 long func; 2085 int idx, vector; 2086 2087 KASSERT(ipifunc != &IDTVEC(rsvd), ("invalid ipifunc %p", ipifunc)); 2088 2089 vector = -1; 2090 mtx_lock_spin(&icu_lock); 2091 for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) { 2092 ip = &idt[idx]; 2093 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2094 if (func == (uintptr_t)&IDTVEC(rsvd)) { 2095 vector = idx; 2096 setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC); 2097 break; 2098 } 2099 } 2100 mtx_unlock_spin(&icu_lock); 2101 return (vector); 2102 } 2103 2104 static void 2105 native_lapic_ipi_free(int vector) 2106 { 2107 struct gate_descriptor *ip; 2108 long func; 2109 2110 KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST, 2111 ("%s: invalid vector %d", __func__, vector)); 2112 2113 mtx_lock_spin(&icu_lock); 2114 ip = &idt[vector]; 2115 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2116 KASSERT(func != (uintptr_t)&IDTVEC(rsvd), 2117 ("invalid idtfunc %#lx", func)); 2118 setidt(vector, &IDTVEC(rsvd), SDT_APICT, SEL_KPL, GSEL_APIC); 2119 mtx_unlock_spin(&icu_lock); 2120 } 2121