1 /* 2 * Copyright (c) 1996, by Steve Passe 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. The name of the developer may NOT be used to endorse or promote products 11 * derived from this software without specific prior written permission. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: src/sys/i386/i386/mpapic.c,v 1.37.2.7 2003/01/25 02:31:47 peter Exp $ 26 */ 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/kernel.h> 31 #include <sys/ktr.h> 32 #include <sys/bus.h> 33 #include <sys/machintr.h> 34 #include <sys/malloc.h> 35 #include <sys/sysctl.h> 36 #include <machine/globaldata.h> 37 #include <machine/clock.h> 38 #include <machine/limits.h> 39 #include <machine/smp.h> 40 #include <machine/md_var.h> 41 #include <machine/pmap.h> 42 #include <machine/specialreg.h> 43 #include <machine_base/apic/lapic.h> 44 #include <machine_base/apic/ioapic.h> 45 #include <machine_base/apic/ioapic_abi.h> 46 #include <machine_base/apic/apicvar.h> 47 #include <machine_base/icu/icu_var.h> 48 #include <machine/segments.h> 49 #include <sys/spinlock2.h> 50 51 #include <machine/cputypes.h> 52 #include <machine/intr_machdep.h> 53 54 #if !defined(KTR_LAPIC) 55 #define KTR_LAPIC KTR_ALL 56 #endif 57 KTR_INFO_MASTER(lapic); 58 KTR_INFO(KTR_LAPIC, lapic, mem_eoi, 0, "mem_eoi"); 59 KTR_INFO(KTR_LAPIC, lapic, msr_eoi, 0, "msr_eoi"); 60 #define log_lapic(name) KTR_LOG(lapic_ ## name) 61 62 extern int naps; 63 64 volatile lapic_t *lapic_mem; 65 66 static void lapic_timer_calibrate(void); 67 static void lapic_timer_set_divisor(int); 68 static void lapic_timer_fixup_handler(void *); 69 static void lapic_timer_restart_handler(void *); 70 71 static int lapic_timer_c1e_test = -1; /* auto-detect */ 72 TUNABLE_INT("hw.lapic_timer_c1e_test", &lapic_timer_c1e_test); 73 74 static int lapic_timer_enable = 1; 75 TUNABLE_INT("hw.lapic_timer_enable", &lapic_timer_enable); 76 77 static int lapic_timer_tscdeadline = 1; 78 TUNABLE_INT("hw.lapic_timer_tscdeadline", &lapic_timer_tscdeadline); 79 80 static int lapic_calibrate_test = 0; 81 TUNABLE_INT("hw.lapic_calibrate_test", &lapic_calibrate_test); 82 83 static int lapic_calibrate_fast = 1; 84 TUNABLE_INT("hw.lapic_calibrate_fast", &lapic_calibrate_fast); 85 86 static void lapic_timer_tscdlt_reload(struct cputimer_intr *, sysclock_t); 87 static void lapic_mem_timer_intr_reload(struct cputimer_intr *, sysclock_t); 88 static void lapic_msr_timer_intr_reload(struct cputimer_intr *, sysclock_t); 89 static void lapic_timer_intr_enable(struct cputimer_intr *); 90 static void lapic_timer_intr_restart(struct cputimer_intr *); 91 static void lapic_timer_intr_pmfixup(struct cputimer_intr *); 92 93 static struct cputimer_intr lapic_cputimer_intr = { 94 .freq = 0, 95 .reload = lapic_mem_timer_intr_reload, 96 .enable = lapic_timer_intr_enable, 97 .config = cputimer_intr_default_config, 98 .restart = lapic_timer_intr_restart, 99 .pmfixup = lapic_timer_intr_pmfixup, 100 .initclock = cputimer_intr_default_initclock, 101 .pcpuhand = NULL, 102 .next = SLIST_ENTRY_INITIALIZER, 103 .name = "lapic", 104 .type = CPUTIMER_INTR_LAPIC, 105 .prio = CPUTIMER_INTR_PRIO_LAPIC, 106 .caps = CPUTIMER_INTR_CAP_NONE, 107 .priv = NULL 108 }; 109 110 static int lapic_timer_divisor_idx = -1; 111 static const uint32_t lapic_timer_divisors[] = { 112 APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, 113 APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128, APIC_TDCR_1 114 }; 115 #define APIC_TIMER_NDIVISORS (int)(NELEM(lapic_timer_divisors)) 116 117 static int lapic_use_tscdeadline = 0; 118 119 /* 120 * APIC ID <-> CPU ID mapping structures. 121 */ 122 int cpu_id_to_apic_id[NAPICID]; 123 int apic_id_to_cpu_id[NAPICID]; 124 int lapic_enable = 1; 125 int lapic_usable = 0; 126 int x2apic_enable = 1; 127 128 SYSCTL_INT(_hw, OID_AUTO, x2apic_enable, CTLFLAG_RD, &x2apic_enable, 0, ""); 129 130 /* Separate cachelines for each cpu's info. */ 131 struct deadlines { 132 uint64_t timestamp; 133 uint64_t downcount_time; 134 uint64_t padding[6]; 135 }; 136 static struct deadlines *tsc_deadlines = NULL; 137 138 static void lapic_mem_eoi(void); 139 static int lapic_mem_ipi(int dest_type, int vector, int delivery_mode); 140 static void lapic_mem_single_ipi(int cpu, int vector, int delivery_mode); 141 142 static void lapic_msr_eoi(void); 143 static int lapic_msr_ipi(int dest_type, int vector, int delivery_mode); 144 static void lapic_msr_single_ipi(int cpu, int vector, int delivery_mode); 145 146 void (*lapic_eoi)(void); 147 int (*apic_ipi)(int dest_type, int vector, int delivery_mode); 148 void (*single_apic_ipi)(int cpu, int vector, int delivery_mode); 149 150 static __inline void 151 lapic_mem_icr_set(uint32_t apic_id, uint32_t icr_lo_val) 152 { 153 uint32_t icr_lo, icr_hi; 154 155 icr_hi = (LAPIC_MEM_READ(icr_hi) & ~APIC_ID_MASK) | 156 (apic_id << APIC_ID_SHIFT); 157 icr_lo = (LAPIC_MEM_READ(icr_lo) & APIC_ICRLO_RESV_MASK) | icr_lo_val; 158 159 LAPIC_MEM_WRITE(icr_hi, icr_hi); 160 LAPIC_MEM_WRITE(icr_lo, icr_lo); 161 } 162 163 static __inline void 164 lapic_msr_icr_set(uint32_t apic_id, uint32_t icr_lo_val) 165 { 166 LAPIC_MSR_WRITE(MSR_X2APIC_ICR, 167 ((uint64_t)apic_id << 32) | ((uint64_t)icr_lo_val)); 168 } 169 170 /* 171 * Enable LAPIC, configure interrupts. 172 */ 173 void 174 lapic_init(boolean_t bsp) 175 { 176 uint32_t timer; 177 u_int temp; 178 179 if (bsp) { 180 /* Decide whether we want to use TSC Deadline mode. */ 181 if (lapic_timer_tscdeadline != 0 && 182 (cpu_feature2 & CPUID2_TSCDLT) && 183 tsc_invariant && tsc_frequency != 0) { 184 lapic_use_tscdeadline = 1; 185 tsc_deadlines = 186 kmalloc(sizeof(struct deadlines) * (naps + 1), 187 M_DEVBUF, 188 M_WAITOK | M_ZERO | M_CACHEALIGN); 189 } 190 } 191 192 /* 193 * Install vectors 194 * 195 * Since IDT is shared between BSP and APs, these vectors 196 * only need to be installed once; we do it on BSP. 197 */ 198 if (bsp) { 199 if (cpu_vendor_id == CPU_VENDOR_AMD && 200 CPUID_TO_FAMILY(cpu_id) >= 0x0f && 201 CPUID_TO_FAMILY(cpu_id) < 0x17) { /* XXX */ 202 uint32_t tcr; 203 204 /* 205 * Set the LINTEN bit in the HyperTransport 206 * Transaction Control Register. 207 * 208 * This will cause EXTINT and NMI interrupts 209 * routed over the hypertransport bus to be 210 * fed into the LAPIC LINT0/LINT1. If the bit 211 * isn't set, the interrupts will go to the 212 * general cpu INTR/NMI pins. On a dual-core 213 * cpu the interrupt winds up going to BOTH cpus. 214 * The first cpu that does the interrupt ack 215 * cycle will get the correct interrupt. The 216 * second cpu that does it will get a spurious 217 * interrupt vector (typically IRQ 7). 218 */ 219 outl(0x0cf8, 220 (1 << 31) | /* enable */ 221 (0 << 16) | /* bus */ 222 (0x18 << 11) | /* dev (cpu + 0x18) */ 223 (0 << 8) | /* func */ 224 0x68 /* reg */ 225 ); 226 tcr = inl(0xcfc); 227 if ((tcr & 0x00010000) == 0) { 228 kprintf("LAPIC: AMD LINTEN on\n"); 229 outl(0xcfc, tcr|0x00010000); 230 } 231 outl(0x0cf8, 0); 232 } 233 234 /* Install a 'Spurious INTerrupt' vector */ 235 setidt_global(XSPURIOUSINT_OFFSET, Xspuriousint, 236 SDT_SYSIGT, SEL_KPL, 0); 237 238 /* Install a timer vector */ 239 setidt_global(XTIMER_OFFSET, Xtimer, 240 SDT_SYSIGT, SEL_KPL, 0); 241 242 /* Install an inter-CPU IPI for TLB invalidation */ 243 setidt_global(XINVLTLB_OFFSET, Xinvltlb, 244 SDT_SYSIGT, SEL_KPL, 0); 245 246 /* Install an inter-CPU IPI for IPIQ messaging */ 247 setidt_global(XIPIQ_OFFSET, Xipiq, 248 SDT_SYSIGT, SEL_KPL, 0); 249 250 /* Install an inter-CPU IPI for CPU stop/restart */ 251 setidt_global(XCPUSTOP_OFFSET, Xcpustop, 252 SDT_SYSIGT, SEL_KPL, 0); 253 254 /* Install an inter-CPU IPI for TLB invalidation */ 255 setidt_global(XSNIFF_OFFSET, Xsniff, 256 SDT_SYSIGT, SEL_KPL, 0); 257 } 258 259 /* 260 * Setup LINT0 as ExtINT on the BSP. This is theoretically an 261 * aggregate interrupt input from the 8259. The INTA cycle 262 * will be routed to the external controller (the 8259) which 263 * is expected to supply the vector. 264 * 265 * Must be setup edge triggered, active high. 266 * 267 * Disable LINT0 on BSP, if I/O APIC is enabled. 268 * 269 * Disable LINT0 on the APs. It doesn't matter what delivery 270 * mode we use because we leave it masked. 271 */ 272 temp = LAPIC_READ(lvt_lint0); 273 temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | 274 APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); 275 if (bsp) { 276 temp |= APIC_LVT_DM_EXTINT; 277 if (ioapic_enable) 278 temp |= APIC_LVT_MASKED; 279 } else { 280 temp |= APIC_LVT_DM_FIXED | APIC_LVT_MASKED; 281 } 282 LAPIC_WRITE(lvt_lint0, temp); 283 284 /* 285 * Setup LINT1 as NMI. 286 * 287 * Must be setup edge trigger, active high. 288 * 289 * Enable LINT1 on BSP, if I/O APIC is enabled. 290 * 291 * Disable LINT1 on the APs. 292 */ 293 temp = LAPIC_READ(lvt_lint1); 294 temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | 295 APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); 296 temp |= APIC_LVT_MASKED | APIC_LVT_DM_NMI; 297 if (bsp && ioapic_enable) 298 temp &= ~APIC_LVT_MASKED; 299 LAPIC_WRITE(lvt_lint1, temp); 300 301 /* 302 * Mask the LAPIC error interrupt, LAPIC performance counter 303 * interrupt. 304 */ 305 LAPIC_WRITE(lvt_error, LAPIC_READ(lvt_error) | APIC_LVT_MASKED); 306 LAPIC_WRITE(lvt_pcint, LAPIC_READ(lvt_pcint) | APIC_LVT_MASKED); 307 308 /* 309 * Set LAPIC timer vector and mask the LAPIC timer interrupt. 310 */ 311 timer = LAPIC_READ(lvt_timer); 312 timer &= ~APIC_LVTT_VECTOR; 313 timer |= XTIMER_OFFSET; 314 timer |= APIC_LVTT_MASKED; 315 LAPIC_WRITE(lvt_timer, timer); 316 317 /* 318 * Set the Task Priority Register as needed. At the moment allow 319 * interrupts on all cpus (the APs will remain CLId until they are 320 * ready to deal). 321 */ 322 temp = LAPIC_READ(tpr); 323 temp &= ~APIC_TPR_PRIO; /* clear priority field */ 324 LAPIC_WRITE(tpr, temp); 325 326 /* 327 * AMD specific setup 328 */ 329 if (cpu_vendor_id == CPU_VENDOR_AMD && lapic_mem != NULL && 330 (LAPIC_MEM_READ(version) & APIC_VER_AMD_EXT_SPACE)) { 331 uint32_t ext_feat; 332 uint32_t count; 333 uint32_t max_count; 334 uint32_t lvt; 335 uint32_t i; 336 337 ext_feat = LAPIC_MEM_READ(ext_feat); 338 count = (ext_feat & APIC_EXTFEAT_MASK) >> APIC_EXTFEAT_SHIFT; 339 max_count = sizeof(lapic_mem->ext_lvt) / 340 sizeof(lapic_mem->ext_lvt[0]); 341 if (count > max_count) 342 count = max_count; 343 for (i = 0; i < count; ++i) { 344 lvt = LAPIC_MEM_READ(ext_lvt[i].lvt); 345 346 lvt &= ~(APIC_LVT_POLARITY_MASK | APIC_LVT_TRIG_MASK | 347 APIC_LVT_DM_MASK | APIC_LVT_MASKED); 348 lvt |= APIC_LVT_MASKED | APIC_LVT_DM_FIXED; 349 350 switch(i) { 351 case APIC_EXTLVT_IBS: 352 break; 353 case APIC_EXTLVT_MCA: 354 break; 355 case APIC_EXTLVT_DEI: 356 break; 357 case APIC_EXTLVT_SBI: 358 break; 359 default: 360 break; 361 } 362 if (bsp) { 363 kprintf(" LAPIC AMD elvt%d: 0x%08x", 364 i, LAPIC_MEM_READ(ext_lvt[i].lvt)); 365 if (LAPIC_MEM_READ(ext_lvt[i].lvt) != lvt) 366 kprintf(" -> 0x%08x", lvt); 367 kprintf("\n"); 368 } 369 LAPIC_MEM_WRITE(ext_lvt[i].lvt, lvt); 370 } 371 } 372 373 /* 374 * Enable the LAPIC 375 */ 376 temp = LAPIC_READ(svr); 377 temp |= APIC_SVR_ENABLE; /* enable the LAPIC */ 378 temp &= ~APIC_SVR_FOCUS_DISABLE; /* enable lopri focus processor */ 379 380 if (LAPIC_READ(version) & APIC_VER_EOI_SUPP) { 381 if (temp & APIC_SVR_EOI_SUPP) { 382 temp &= ~APIC_SVR_EOI_SUPP; 383 if (bsp) 384 kprintf(" LAPIC disabling EOI supp\n"); 385 } 386 /* (future, on KVM auto-EOI must be disabled) */ 387 if (vmm_guest == VMM_GUEST_KVM) 388 temp &= ~APIC_SVR_EOI_SUPP; 389 } 390 391 /* 392 * Set the spurious interrupt vector. The low 4 bits of the vector 393 * must be 1111. 394 */ 395 if ((XSPURIOUSINT_OFFSET & 0x0F) != 0x0F) 396 panic("bad XSPURIOUSINT_OFFSET: 0x%08x", XSPURIOUSINT_OFFSET); 397 temp &= ~APIC_SVR_VECTOR; 398 temp |= XSPURIOUSINT_OFFSET; 399 400 LAPIC_WRITE(svr, temp); 401 402 /* 403 * Pump out a few EOIs to clean out interrupts that got through 404 * before we were able to set the TPR. 405 */ 406 LAPIC_WRITE(eoi, 0); 407 LAPIC_WRITE(eoi, 0); 408 LAPIC_WRITE(eoi, 0); 409 410 if (bsp) { 411 lapic_timer_calibrate(); 412 if (lapic_timer_enable) { 413 if (cpu_thermal_feature & CPUID_THERMAL_ARAT) { 414 /* 415 * Local APIC timer will not stop 416 * in deep C-state. 417 */ 418 lapic_cputimer_intr.caps |= 419 CPUTIMER_INTR_CAP_PS; 420 } 421 if (lapic_use_tscdeadline) { 422 lapic_cputimer_intr.reload = 423 lapic_timer_tscdlt_reload; 424 } 425 cputimer_intr_register(&lapic_cputimer_intr); 426 cputimer_intr_select(&lapic_cputimer_intr, 0); 427 } 428 } else if (!lapic_use_tscdeadline) { 429 lapic_timer_set_divisor(lapic_timer_divisor_idx); 430 } 431 432 if (bootverbose) 433 apic_dump("apic_initialize()"); 434 } 435 436 static void 437 lapic_timer_set_divisor(int divisor_idx) 438 { 439 KKASSERT(divisor_idx >= 0 && divisor_idx < APIC_TIMER_NDIVISORS); 440 LAPIC_WRITE(dcr_timer, lapic_timer_divisors[divisor_idx]); 441 } 442 443 static void 444 lapic_timer_oneshot(u_int count) 445 { 446 uint32_t value; 447 448 value = LAPIC_READ(lvt_timer); 449 value &= ~(APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT); 450 LAPIC_WRITE(lvt_timer, value); 451 LAPIC_WRITE(icr_timer, count); 452 } 453 454 static void 455 lapic_timer_oneshot_quick(u_int count) 456 { 457 LAPIC_WRITE(icr_timer, count); 458 } 459 460 static void 461 lapic_timer_tscdeadline_quick(uint64_t diff) 462 { 463 uint64_t val = rdtsc() + diff; 464 465 wrmsr(MSR_TSC_DEADLINE, val); 466 tsc_deadlines[mycpuid].timestamp = val; 467 } 468 469 static uint64_t 470 lapic_scale_to_tsc(unsigned value, unsigned scale) 471 { 472 uint64_t val; 473 474 val = value; 475 val *= tsc_frequency; 476 val += (scale - 1); 477 val /= scale; 478 return val; 479 } 480 481 #define MAX_MEASURE_RETRIES 100 482 483 static u_int64_t 484 do_tsc_calibration(u_int us, u_int64_t apic_delay_tsc) 485 { 486 u_int64_t old_tsc1, old_tsc2, new_tsc1, new_tsc2; 487 u_int64_t diff, count; 488 u_int64_t a; 489 u_int32_t start, end; 490 int retries1 = 0, retries2 = 0; 491 492 retry1: 493 lapic_timer_oneshot_quick(APIC_TIMER_MAX_COUNT); 494 old_tsc1 = rdtsc_ordered(); 495 start = LAPIC_READ(ccr_timer); 496 old_tsc2 = rdtsc_ordered(); 497 if (apic_delay_tsc > 0 && retries1 < MAX_MEASURE_RETRIES && 498 old_tsc2 - old_tsc1 > 2 * apic_delay_tsc) { 499 retries1++; 500 goto retry1; 501 } 502 DELAY(us); 503 retry2: 504 new_tsc1 = rdtsc_ordered(); 505 end = LAPIC_READ(ccr_timer); 506 new_tsc2 = rdtsc_ordered(); 507 if (apic_delay_tsc > 0 && retries2 < MAX_MEASURE_RETRIES && 508 new_tsc2 - new_tsc1 > 2 * apic_delay_tsc) { 509 retries2++; 510 goto retry2; 511 } 512 if (end == 0) 513 return 0; 514 515 count = start - end; 516 517 /* Make sure the lapic can count for up to 2s */ 518 a = (unsigned)APIC_TIMER_MAX_COUNT; 519 if (us < 2000000 && (u_int64_t)count * 2000000 >= a * us) 520 return 0; 521 522 if (lapic_calibrate_test > 0 && (retries1 > 0 || retries2 > 0)) { 523 kprintf("%s: retries1=%d retries2=%d\n", 524 __func__, retries1, retries2); 525 } 526 527 diff = (new_tsc1 - old_tsc1) + (new_tsc2 - old_tsc2); 528 /* XXX First estimate if the total TSC diff value makes sense */ 529 /* This will almost overflow, but only almost :) */ 530 count = (2 * count * tsc_frequency) / diff; 531 532 return count; 533 } 534 535 static uint64_t 536 do_cputimer_calibration(u_int us) 537 { 538 sysclock_t value; 539 sysclock_t start, end; 540 uint32_t beginning, finish; 541 542 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT); 543 beginning = LAPIC_READ(ccr_timer); 544 start = sys_cputimer->count(); 545 DELAY(us); 546 end = sys_cputimer->count(); 547 finish = LAPIC_READ(ccr_timer); 548 if (finish == 0) 549 return 0; 550 /* value is the LAPIC timer difference. */ 551 value = (uint32_t)(beginning - finish); 552 /* end is the sys_cputimer difference. */ 553 end -= start; 554 if (end == 0) 555 return 0; 556 value = muldivu64(value, sys_cputimer->freq, end); 557 558 return value; 559 } 560 561 static void 562 lapic_timer_calibrate(void) 563 { 564 sysclock_t value; 565 u_int64_t apic_delay_tsc = 0; 566 int use_tsc_calibration = 0; 567 568 /* No need to calibrate lapic_timer, if we will use TSC Deadline mode */ 569 if (lapic_use_tscdeadline) { 570 lapic_cputimer_intr.freq = tsc_frequency; 571 kprintf( 572 "lapic: TSC Deadline Mode: frequency %lu Hz\n", 573 lapic_cputimer_intr.freq); 574 return; 575 } 576 577 /* 578 * On real hardware, tsc_invariant == 0 wouldn't be an issue, but in 579 * a virtual machine the frequency may get changed by the host. 580 */ 581 if (tsc_frequency != 0 && tsc_invariant && lapic_calibrate_fast) 582 use_tsc_calibration = 1; 583 584 if (use_tsc_calibration) { 585 u_int64_t min_apic_tsc = 0, max_apic_tsc = 0; 586 u_int64_t old_tsc, new_tsc; 587 uint32_t val; 588 int i; 589 590 /* warm up */ 591 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT); 592 for (i = 0; i < 10; i++) 593 val = LAPIC_READ(ccr_timer); 594 595 for (i = 0; i < 100; i++) { 596 old_tsc = rdtsc_ordered(); 597 val = LAPIC_READ(ccr_timer); 598 new_tsc = rdtsc_ordered(); 599 new_tsc -= old_tsc; 600 apic_delay_tsc += new_tsc; 601 if (min_apic_tsc == 0 || 602 min_apic_tsc > new_tsc) { 603 min_apic_tsc = new_tsc; 604 } 605 if (max_apic_tsc < new_tsc) 606 max_apic_tsc = new_tsc; 607 } 608 apic_delay_tsc /= 100; 609 kprintf( 610 "LAPIC latency (in TSC ticks): %lu min: %lu max: %lu\n", 611 apic_delay_tsc, min_apic_tsc, max_apic_tsc); 612 apic_delay_tsc = min_apic_tsc; 613 } 614 615 if (!use_tsc_calibration) { 616 int i; 617 618 /* 619 * Do some exercising of the lapic timer access. This improves 620 * precision of the subsequent calibration run in at least some 621 * virtualization cases. 622 */ 623 lapic_timer_set_divisor(0); 624 for (i = 0; i < 10; i++) 625 (void)do_cputimer_calibration(100); 626 } 627 /* Try to calibrate the local APIC timer. */ 628 for (lapic_timer_divisor_idx = 0; 629 lapic_timer_divisor_idx < APIC_TIMER_NDIVISORS; 630 lapic_timer_divisor_idx++) { 631 lapic_timer_set_divisor(lapic_timer_divisor_idx); 632 if (use_tsc_calibration) { 633 value = do_tsc_calibration(200*1000, apic_delay_tsc); 634 } else { 635 value = do_cputimer_calibration(2*1000*1000); 636 } 637 if (value != 0) 638 break; 639 } 640 if (lapic_timer_divisor_idx >= APIC_TIMER_NDIVISORS) 641 panic("lapic: no proper timer divisor?!"); 642 lapic_cputimer_intr.freq = value; 643 644 kprintf("lapic: divisor index %d, frequency %lu Hz\n", 645 lapic_timer_divisor_idx, lapic_cputimer_intr.freq); 646 647 if (lapic_calibrate_test > 0) { 648 uint64_t freq; 649 int i; 650 651 for (i = 1; i <= 20; i++) { 652 if (use_tsc_calibration) { 653 freq = do_tsc_calibration(i*100*1000, 654 apic_delay_tsc); 655 } else { 656 freq = do_cputimer_calibration(i*100*1000); 657 } 658 if (freq != 0) 659 kprintf("%ums: %lu\n", i * 100, freq); 660 } 661 } 662 } 663 664 static void 665 lapic_timer_tscdlt_reload(struct cputimer_intr *cti, sysclock_t reload) 666 { 667 struct globaldata *gd = mycpu; 668 uint64_t diff, now, val; 669 670 /* 671 * Set maximum deadline to 60 seconds 672 */ 673 if (reload > sys_cputimer->freq * 60) 674 reload = sys_cputimer->freq * 60; 675 diff = muldivu64(reload, tsc_frequency, sys_cputimer->freq); 676 if (diff < 4) 677 diff = 4; 678 if (cpu_vendor_id == CPU_VENDOR_INTEL) 679 cpu_lfence(); 680 else 681 cpu_mfence(); 682 now = rdtsc(); 683 val = now + diff; 684 if (gd->gd_timer_running) { 685 uint64_t deadline = tsc_deadlines[mycpuid].timestamp; 686 if (deadline == 0 || now > deadline || val < deadline) { 687 wrmsr(MSR_TSC_DEADLINE, val); 688 tsc_deadlines[mycpuid].timestamp = val; 689 } 690 } else { 691 gd->gd_timer_running = 1; 692 wrmsr(MSR_TSC_DEADLINE, val); 693 tsc_deadlines[mycpuid].timestamp = val; 694 } 695 } 696 697 static void 698 lapic_mem_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload) 699 { 700 struct globaldata *gd = mycpu; 701 702 if ((ssysclock_t)reload < 0) 703 reload = 1; 704 reload = muldivu64(reload, cti->freq, sys_cputimer->freq); 705 if (reload < 2) 706 reload = 2; 707 if (reload > 0xFFFFFFFF) 708 reload = 0xFFFFFFFF; 709 710 if (gd->gd_timer_running) { 711 if (reload < LAPIC_MEM_READ(ccr_timer)) 712 LAPIC_MEM_WRITE(icr_timer, (uint32_t)reload); 713 } else { 714 gd->gd_timer_running = 1; 715 LAPIC_MEM_WRITE(icr_timer, (uint32_t)reload); 716 } 717 } 718 719 static void 720 lapic_msr_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload) 721 { 722 struct globaldata *gd = mycpu; 723 724 if ((ssysclock_t)reload < 0) 725 reload = 1; 726 reload = muldivu64(reload, cti->freq, sys_cputimer->freq); 727 if (reload < 2) 728 reload = 2; 729 if (reload > 0xFFFFFFFF) 730 reload = 0xFFFFFFFF; 731 732 if (gd->gd_timer_running) { 733 if (reload < LAPIC_MSR_READ(MSR_X2APIC_CCR_TIMER)) 734 LAPIC_MSR_WRITE(MSR_X2APIC_ICR_TIMER, (uint32_t)reload); 735 } else { 736 gd->gd_timer_running = 1; 737 LAPIC_MSR_WRITE(MSR_X2APIC_ICR_TIMER, (uint32_t)reload); 738 } 739 } 740 741 static void 742 lapic_timer_intr_enable(struct cputimer_intr *cti __unused) 743 { 744 uint32_t timer; 745 746 timer = LAPIC_READ(lvt_timer); 747 timer &= ~(APIC_LVTT_MASKED | APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT); 748 if (lapic_use_tscdeadline) 749 timer |= APIC_LVTT_TSCDLT; 750 LAPIC_WRITE(lvt_timer, timer); 751 if (lapic_use_tscdeadline) 752 cpu_mfence(); 753 754 lapic_timer_fixup_handler(NULL); 755 } 756 757 static void 758 lapic_timer_fixup_handler(void *arg) 759 { 760 int *started = arg; 761 762 if (started != NULL) 763 *started = 0; 764 765 if (cpu_vendor_id == CPU_VENDOR_AMD) { 766 int c1e_test = lapic_timer_c1e_test; 767 768 if (c1e_test < 0) { 769 if (vmm_guest == VMM_GUEST_NONE) { 770 c1e_test = 1; 771 } else { 772 /* 773 * Don't do this C1E testing and adjustment 774 * on virtual machines, the best case for 775 * accessing this MSR is a NOOP; the worst 776 * cases could be pretty nasty, e.g. crash. 777 */ 778 c1e_test = 0; 779 } 780 } 781 782 /* 783 * Detect the presence of C1E capability mostly on latest 784 * dual-cores (or future) k8 family. This feature renders 785 * the local APIC timer dead, so we disable it by reading 786 * the Interrupt Pending Message register and clearing both 787 * C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27). 788 * 789 * Reference: 790 * "BIOS and Kernel Developer's Guide for AMD NPT 791 * Family 0Fh Processors" 792 * #32559 revision 3.00 793 */ 794 if ((cpu_id & 0x00000f00) == 0x00000f00 && 795 (cpu_id & 0x0fff0000) >= 0x00040000 && 796 c1e_test) { 797 uint64_t msr; 798 799 msr = rdmsr(0xc0010055); 800 if (msr & 0x18000000) { 801 struct globaldata *gd = mycpu; 802 803 kprintf("cpu%d: AMD C1E detected\n", 804 gd->gd_cpuid); 805 wrmsr(0xc0010055, msr & ~0x18000000ULL); 806 807 /* 808 * We are kinda stalled; 809 * kick start again. 810 */ 811 gd->gd_timer_running = 1; 812 if (lapic_use_tscdeadline) { 813 /* Maybe reached in Virtual Machines? */ 814 lapic_timer_tscdeadline_quick(5000); 815 } else { 816 lapic_timer_oneshot_quick(2); 817 } 818 819 if (started != NULL) 820 *started = 1; 821 } 822 } 823 } 824 } 825 826 static void 827 lapic_timer_restart_handler(void *dummy __unused) 828 { 829 int started; 830 831 lapic_timer_fixup_handler(&started); 832 if (!started) { 833 struct globaldata *gd = mycpu; 834 835 gd->gd_timer_running = 1; 836 if (lapic_use_tscdeadline) { 837 /* Maybe reached in Virtual Machines? */ 838 lapic_timer_tscdeadline_quick(5000); 839 } else { 840 lapic_timer_oneshot_quick(2); 841 } 842 } 843 } 844 845 /* 846 * This function is called only by ACPICA code currently: 847 * - AMD C1E fixup. AMD C1E only seems to happen after ACPI 848 * module controls PM. So once ACPICA is attached, we try 849 * to apply the fixup to prevent LAPIC timer from hanging. 850 */ 851 static void 852 lapic_timer_intr_pmfixup(struct cputimer_intr *cti __unused) 853 { 854 lwkt_send_ipiq_mask(smp_active_mask, 855 lapic_timer_fixup_handler, NULL); 856 } 857 858 static void 859 lapic_timer_intr_restart(struct cputimer_intr *cti __unused) 860 { 861 lwkt_send_ipiq_mask(smp_active_mask, lapic_timer_restart_handler, NULL); 862 } 863 864 865 /* 866 * dump contents of local APIC registers 867 */ 868 void 869 apic_dump(char* str) 870 { 871 kprintf("SMP: CPU%d %s:\n", mycpu->gd_cpuid, str); 872 kprintf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", 873 LAPIC_READ(lvt_lint0), LAPIC_READ(lvt_lint1), LAPIC_READ(tpr), 874 LAPIC_READ(svr)); 875 } 876 877 /* 878 * Inter Processor Interrupt functions. 879 */ 880 881 static __inline void 882 lapic_mem_icr_unpend(const char *func) 883 { 884 if (LAPIC_MEM_READ(icr_lo) & APIC_DELSTAT_PEND) { 885 int64_t tsc; 886 int loops = 1; 887 888 tsc = rdtsc(); 889 while (LAPIC_MEM_READ(icr_lo) & APIC_DELSTAT_PEND) { 890 cpu_pause(); 891 if ((tsc_sclock_t)(rdtsc() - 892 (tsc + tsc_frequency)) > 0) { 893 tsc = rdtsc(); 894 if (++loops > 30) { 895 panic("%s: cpu%d apic stalled", 896 func, mycpuid); 897 } else { 898 kprintf("%s: cpu%d apic stalled\n", 899 func, mycpuid); 900 } 901 } 902 } 903 } 904 } 905 906 /* 907 * Send APIC IPI 'vector' to 'destType' via 'deliveryMode'. 908 * 909 * destType is 1 of: APIC_DEST_SELF, APIC_DEST_ALLISELF, APIC_DEST_ALLESELF 910 * vector is any valid SYSTEM INT vector 911 * delivery_mode is 1 of: APIC_DELMODE_FIXED, APIC_DELMODE_LOWPRIO 912 * 913 * WARNINGS! 914 * 915 * We now implement a per-cpu interlock (gd->gd_npoll) to prevent more than 916 * one IPI from being sent to any given cpu at a time. Thus we no longer 917 * have to process incoming IPIs while waiting for the status to clear. 918 * No deadlock should be possible. 919 * 920 * We now physically disable interrupts for the lapic ICR operation. If 921 * we do not do this then it looks like an EOI sent to the lapic (which 922 * occurs even with a critical section) can interfere with the command 923 * register ready status and cause an IPI to be lost. 924 * 925 * e.g. an interrupt can occur, issue the EOI, IRET, and cause the command 926 * register to busy just before we write to icr_lo, resulting in a lost 927 * issuance. This only appears to occur on Intel cpus and is not 928 * documented. It could simply be that cpus are so fast these days that 929 * it was always an issue, but is only now rearing its ugly head. This 930 * is conjecture. 931 */ 932 static int 933 lapic_mem_ipi(int dest_type, int vector, int delivery_mode) 934 { 935 lapic_mem_icr_unpend(__func__); 936 lapic_mem_icr_set(0, 937 dest_type | APIC_LEVEL_ASSERT | delivery_mode | vector); 938 return 0; 939 } 940 941 static int 942 lapic_msr_ipi(int dest_type, int vector, int delivery_mode) 943 { 944 lapic_msr_icr_set(0, 945 dest_type | APIC_LEVEL_ASSERT | delivery_mode | vector); 946 return 0; 947 } 948 949 /* 950 * Interrupts must be hard-disabled by caller 951 */ 952 static void 953 lapic_mem_single_ipi(int cpu, int vector, int delivery_mode) 954 { 955 lapic_mem_icr_unpend(__func__); 956 lapic_mem_icr_set(CPUID_TO_APICID(cpu), 957 APIC_DEST_DESTFLD | APIC_LEVEL_ASSERT | delivery_mode | vector); 958 } 959 960 static void 961 lapic_msr_single_ipi(int cpu, int vector, int delivery_mode) 962 { 963 lapic_msr_icr_set(CPUID_TO_APICID(cpu), 964 APIC_DEST_DESTFLD | APIC_LEVEL_ASSERT | delivery_mode | vector); 965 } 966 967 /* 968 * Send APIC IPI 'vector' to 'target's via 'delivery_mode'. 969 * 970 * target is a bitmask of destination cpus. Vector is any 971 * valid system INT vector. Delivery mode may be either 972 * APIC_DELMODE_FIXED or APIC_DELMODE_LOWPRIO. 973 * 974 * Interrupts must be hard-disabled by caller 975 */ 976 void 977 selected_apic_ipi(cpumask_t target, int vector, int delivery_mode) 978 { 979 while (CPUMASK_TESTNZERO(target)) { 980 int n = BSFCPUMASK(target); 981 CPUMASK_NANDBIT(target, n); 982 single_apic_ipi(n, vector, delivery_mode); 983 } 984 } 985 986 /* 987 * Load a 'downcount time' in uSeconds. 988 */ 989 void 990 set_apic_timer(int us) 991 { 992 u_int count; 993 994 if (lapic_use_tscdeadline) { 995 uint64_t val; 996 997 val = lapic_scale_to_tsc(us, 1000000); 998 val += rdtsc(); 999 /* No need to arm the lapic here, just track the timeout. */ 1000 tsc_deadlines[mycpuid].downcount_time = val; 1001 return; 1002 } 1003 1004 /* 1005 * When we reach here, lapic timer's frequency 1006 * must have been calculated as well as the 1007 * divisor (lapic->dcr_timer is setup during the 1008 * divisor calculation). 1009 */ 1010 KKASSERT(lapic_cputimer_intr.freq != 0 && 1011 lapic_timer_divisor_idx >= 0); 1012 1013 count = ((us * (int64_t)lapic_cputimer_intr.freq) + 999999) / 1000000; 1014 lapic_timer_oneshot(count); 1015 } 1016 1017 1018 /* 1019 * Read remaining time in timer, in microseconds (rounded up). 1020 */ 1021 int 1022 read_apic_timer(void) 1023 { 1024 uint64_t val; 1025 1026 if (lapic_use_tscdeadline) { 1027 uint64_t now; 1028 1029 val = tsc_deadlines[mycpuid].downcount_time; 1030 now = rdtsc(); 1031 if (val == 0 || now > val) { 1032 return 0; 1033 } else { 1034 val -= now; 1035 val *= 1000000; 1036 val += (tsc_frequency - 1); 1037 val /= tsc_frequency; 1038 if (val > INT_MAX) 1039 val = INT_MAX; 1040 return val; 1041 } 1042 } 1043 1044 val = LAPIC_READ(ccr_timer); 1045 if (val == 0) 1046 return 0; 1047 1048 KKASSERT(lapic_cputimer_intr.freq > 0); 1049 val *= 1000000; 1050 val += (lapic_cputimer_intr.freq - 1); 1051 val /= lapic_cputimer_intr.freq; 1052 if (val > INT_MAX) 1053 val = INT_MAX; 1054 return val; 1055 } 1056 1057 1058 /* 1059 * Spin-style delay, set delay time in uS, spin till it drains. 1060 */ 1061 void 1062 u_sleep(int count) 1063 { 1064 set_apic_timer(count); 1065 while (read_apic_timer()) 1066 /* spin */ ; 1067 } 1068 1069 int 1070 lapic_unused_apic_id(int start) 1071 { 1072 int i; 1073 1074 for (i = start; i < APICID_MAX; ++i) { 1075 if (APICID_TO_CPUID(i) == -1) 1076 return i; 1077 } 1078 return NAPICID; 1079 } 1080 1081 void 1082 lapic_map(vm_paddr_t lapic_addr) 1083 { 1084 lapic_mem = pmap_mapdev_uncacheable(lapic_addr, sizeof(struct LAPIC)); 1085 } 1086 1087 void 1088 lapic_x2apic_enter(boolean_t bsp) 1089 { 1090 uint64_t apic_base; 1091 1092 KASSERT(x2apic_enable, ("X2APIC mode is not enabled")); 1093 1094 /* 1095 * X2APIC mode is requested, if it has not been enabled by the BIOS, 1096 * enable it now. 1097 */ 1098 apic_base = rdmsr(MSR_APICBASE); 1099 if ((apic_base & APICBASE_X2APIC) == 0) { 1100 wrmsr(MSR_APICBASE, 1101 apic_base | APICBASE_X2APIC | APICBASE_ENABLED); 1102 } 1103 if (bsp) { 1104 lapic_eoi = lapic_msr_eoi; 1105 apic_ipi = lapic_msr_ipi; 1106 single_apic_ipi = lapic_msr_single_ipi; 1107 lapic_cputimer_intr.reload = lapic_msr_timer_intr_reload; 1108 } 1109 } 1110 1111 static TAILQ_HEAD(, lapic_enumerator) lapic_enumerators = 1112 TAILQ_HEAD_INITIALIZER(lapic_enumerators); 1113 1114 int 1115 lapic_config(void) 1116 { 1117 struct lapic_enumerator *e; 1118 uint64_t apic_base; 1119 int error, i, ap_max; 1120 1121 KKASSERT(lapic_enable); 1122 1123 lapic_eoi = lapic_mem_eoi; 1124 apic_ipi = lapic_mem_ipi; 1125 single_apic_ipi = lapic_mem_single_ipi; 1126 1127 TUNABLE_INT_FETCH("hw.x2apic_enable", &x2apic_enable); 1128 if (x2apic_enable < 0) 1129 x2apic_enable = 1; 1130 if ((cpu_feature2 & CPUID2_X2APIC) == 0) { 1131 /* X2APIC is not supported. */ 1132 x2apic_enable = 0; 1133 } else { 1134 /* 1135 * If the BIOS enabled the X2APIC mode, then we would stick 1136 * with the X2APIC mode. 1137 */ 1138 apic_base = rdmsr(MSR_APICBASE); 1139 if (apic_base & APICBASE_X2APIC) { 1140 if (x2apic_enable == 0) 1141 kprintf("LAPIC: BIOS enabled X2APIC mode, force on\n"); 1142 else 1143 kprintf("LAPIC: BIOS enabled X2APIC mode\n"); 1144 x2apic_enable = 1; 1145 } 1146 } 1147 if (cpu_feature2 & CPUID2_X2APIC) { 1148 apic_base = rdmsr(MSR_APICBASE); 1149 if (apic_base & APICBASE_X2APIC) 1150 kprintf("LAPIC: BIOS already enabled X2APIC mode\n"); 1151 } 1152 1153 if (x2apic_enable) { 1154 /* 1155 * Enter X2APIC mode. 1156 */ 1157 kprintf("LAPIC: enter X2APIC mode\n"); 1158 lapic_x2apic_enter(TRUE); 1159 } 1160 1161 for (i = 0; i < NAPICID; ++i) 1162 APICID_TO_CPUID(i) = -1; 1163 1164 TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) { 1165 error = e->lapic_probe(e); 1166 if (!error) 1167 break; 1168 } 1169 if (e == NULL) { 1170 kprintf("LAPIC: Can't find LAPIC\n"); 1171 return ENXIO; 1172 } 1173 1174 error = e->lapic_enumerate(e); 1175 if (error) { 1176 kprintf("LAPIC: enumeration failed\n"); 1177 return ENXIO; 1178 } 1179 1180 /* LAPIC is usable now. */ 1181 lapic_usable = 1; 1182 1183 ap_max = MAXCPU - 1; 1184 TUNABLE_INT_FETCH("hw.ap_max", &ap_max); 1185 if (ap_max > MAXCPU - 1) 1186 ap_max = MAXCPU - 1; 1187 1188 if (naps > ap_max) { 1189 kprintf("LAPIC: Warning use only %d out of %d " 1190 "available APs\n", 1191 ap_max, naps); 1192 naps = ap_max; 1193 } 1194 1195 return 0; 1196 } 1197 1198 void 1199 lapic_enumerator_register(struct lapic_enumerator *ne) 1200 { 1201 struct lapic_enumerator *e; 1202 1203 TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) { 1204 if (e->lapic_prio < ne->lapic_prio) { 1205 TAILQ_INSERT_BEFORE(e, ne, lapic_link); 1206 return; 1207 } 1208 } 1209 TAILQ_INSERT_TAIL(&lapic_enumerators, ne, lapic_link); 1210 } 1211 1212 void 1213 lapic_set_cpuid(int cpu_id, int apic_id) 1214 { 1215 CPUID_TO_APICID(cpu_id) = apic_id; 1216 APICID_TO_CPUID(apic_id) = cpu_id; 1217 } 1218 1219 void 1220 lapic_fixup_noioapic(void) 1221 { 1222 u_int temp; 1223 1224 /* Only allowed on BSP */ 1225 KKASSERT(mycpuid == 0); 1226 KKASSERT(!ioapic_enable); 1227 1228 temp = LAPIC_READ(lvt_lint0); 1229 temp &= ~APIC_LVT_MASKED; 1230 LAPIC_WRITE(lvt_lint0, temp); 1231 1232 temp = LAPIC_READ(lvt_lint1); 1233 temp |= APIC_LVT_MASKED; 1234 LAPIC_WRITE(lvt_lint1, temp); 1235 } 1236 1237 static void 1238 lapic_mem_eoi(void) 1239 { 1240 log_lapic(mem_eoi); 1241 LAPIC_MEM_WRITE(eoi, 0); 1242 } 1243 1244 static void 1245 lapic_msr_eoi(void) 1246 { 1247 log_lapic(msr_eoi); 1248 LAPIC_MSR_WRITE(MSR_X2APIC_EOI, 0); 1249 } 1250 1251 static void 1252 lapic_mem_seticr_sync(uint32_t apic_id, uint32_t icr_lo_val) 1253 { 1254 lapic_mem_icr_set(apic_id, icr_lo_val); 1255 while (LAPIC_MEM_READ(icr_lo) & APIC_DELSTAT_PEND) 1256 /* spin */; 1257 } 1258 1259 void 1260 lapic_seticr_sync(uint32_t apic_id, uint32_t icr_lo_val) 1261 { 1262 if (x2apic_enable) 1263 lapic_msr_icr_set(apic_id, icr_lo_val); 1264 else 1265 lapic_mem_seticr_sync(apic_id, icr_lo_val); 1266 } 1267 1268 static void 1269 lapic_sysinit(void *dummy __unused) 1270 { 1271 if (lapic_enable) { 1272 int error; 1273 1274 error = lapic_config(); 1275 if (error) 1276 lapic_enable = 0; 1277 } 1278 if (!lapic_enable) 1279 x2apic_enable = 0; 1280 1281 if (lapic_enable) { 1282 /* Initialize BSP's local APIC */ 1283 lapic_init(TRUE); 1284 } else if (ioapic_enable) { 1285 kprintf("IOAPIC disabled - lapic was not enabled\n"); 1286 ioapic_enable = 0; 1287 icu_reinit_noioapic(); 1288 } 1289 } 1290 SYSINIT(lapic, SI_BOOT2_LAPIC, SI_ORDER_FIRST, lapic_sysinit, NULL); 1291