1 /* 2 * Copyright (c) 1996, by Steve Passe 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. The name of the developer may NOT be used to endorse or promote products 11 * derived from this software without specific prior written permission. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: src/sys/i386/i386/mpapic.c,v 1.37.2.7 2003/01/25 02:31:47 peter Exp $ 26 */ 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/kernel.h> 31 #include <sys/ktr.h> 32 #include <sys/bus.h> 33 #include <sys/machintr.h> 34 #include <sys/sysctl.h> 35 #include <machine/globaldata.h> 36 #include <machine/clock.h> 37 #include <machine/limits.h> 38 #include <machine/smp.h> 39 #include <machine/md_var.h> 40 #include <machine/pmap.h> 41 #include <machine/specialreg.h> 42 #include <machine_base/apic/lapic.h> 43 #include <machine_base/apic/ioapic.h> 44 #include <machine_base/apic/ioapic_abi.h> 45 #include <machine_base/apic/apicvar.h> 46 #include <machine_base/icu/icu_var.h> 47 #include <machine/segments.h> 48 #include <sys/spinlock2.h> 49 50 #include <machine/cputypes.h> 51 #include <machine/intr_machdep.h> 52 53 #if !defined(KTR_LAPIC) 54 #define KTR_LAPIC KTR_ALL 55 #endif 56 KTR_INFO_MASTER(lapic); 57 KTR_INFO(KTR_LAPIC, lapic, mem_eoi, 0, "mem_eoi"); 58 KTR_INFO(KTR_LAPIC, lapic, msr_eoi, 0, "msr_eoi"); 59 #define log_lapic(name) KTR_LOG(lapic_ ## name) 60 61 extern int naps; 62 63 volatile lapic_t *lapic_mem; 64 65 static void lapic_timer_calibrate(void); 66 static void lapic_timer_set_divisor(int); 67 static void lapic_timer_fixup_handler(void *); 68 static void lapic_timer_restart_handler(void *); 69 70 static int lapic_timer_c1e_test = -1; /* auto-detect */ 71 TUNABLE_INT("hw.lapic_timer_c1e_test", &lapic_timer_c1e_test); 72 73 static int lapic_timer_enable = 1; 74 TUNABLE_INT("hw.lapic_timer_enable", &lapic_timer_enable); 75 76 static int lapic_timer_tscdeadline = 1; 77 TUNABLE_INT("hw.lapic_timer_tscdeadline", &lapic_timer_tscdeadline); 78 79 static int lapic_calibrate_test = 0; 80 TUNABLE_INT("hw.lapic_calibrate_test", &lapic_calibrate_test); 81 82 static int lapic_calibrate_fast = 1; 83 TUNABLE_INT("hw.lapic_calibrate_fast", &lapic_calibrate_fast); 84 85 static void lapic_timer_tscdlt_reload(struct cputimer_intr *, sysclock_t); 86 static void lapic_mem_timer_intr_reload(struct cputimer_intr *, sysclock_t); 87 static void lapic_msr_timer_intr_reload(struct cputimer_intr *, sysclock_t); 88 static void lapic_timer_intr_enable(struct cputimer_intr *); 89 static void lapic_timer_intr_restart(struct cputimer_intr *); 90 static void lapic_timer_intr_pmfixup(struct cputimer_intr *); 91 92 static struct cputimer_intr lapic_cputimer_intr = { 93 .freq = 0, 94 .reload = lapic_mem_timer_intr_reload, 95 .enable = lapic_timer_intr_enable, 96 .config = cputimer_intr_default_config, 97 .restart = lapic_timer_intr_restart, 98 .pmfixup = lapic_timer_intr_pmfixup, 99 .initclock = cputimer_intr_default_initclock, 100 .pcpuhand = NULL, 101 .next = SLIST_ENTRY_INITIALIZER, 102 .name = "lapic", 103 .type = CPUTIMER_INTR_LAPIC, 104 .prio = CPUTIMER_INTR_PRIO_LAPIC, 105 .caps = CPUTIMER_INTR_CAP_NONE, 106 .priv = NULL 107 }; 108 109 static int lapic_timer_divisor_idx = -1; 110 static const uint32_t lapic_timer_divisors[] = { 111 APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, 112 APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128, APIC_TDCR_1 113 }; 114 #define APIC_TIMER_NDIVISORS (int)(NELEM(lapic_timer_divisors)) 115 116 static int lapic_use_tscdeadline = 0; 117 /* The raw TSC frequency might not fit into a sysclock_t value. */ 118 static int lapic_timer_tscfreq_shift; 119 120 /* 121 * APIC ID <-> CPU ID mapping structures. 122 */ 123 int cpu_id_to_apic_id[NAPICID]; 124 int apic_id_to_cpu_id[NAPICID]; 125 int lapic_enable = 1; 126 int lapic_usable = 0; 127 int x2apic_enable = 1; 128 129 SYSCTL_INT(_hw, OID_AUTO, x2apic_enable, CTLFLAG_RD, &x2apic_enable, 0, ""); 130 131 /* Separate cachelines for each cpu's info. */ 132 struct deadlines { 133 uint64_t timestamp; 134 uint64_t downcount_time; 135 uint64_t padding[6]; 136 }; 137 struct deadlines *tsc_deadlines = NULL; 138 139 static void lapic_mem_eoi(void); 140 static int lapic_mem_ipi(int dest_type, int vector, int delivery_mode); 141 static void lapic_mem_single_ipi(int cpu, int vector, int delivery_mode); 142 143 static void lapic_msr_eoi(void); 144 static int lapic_msr_ipi(int dest_type, int vector, int delivery_mode); 145 static void lapic_msr_single_ipi(int cpu, int vector, int delivery_mode); 146 147 void (*lapic_eoi)(void); 148 int (*apic_ipi)(int dest_type, int vector, int delivery_mode); 149 void (*single_apic_ipi)(int cpu, int vector, int delivery_mode); 150 151 static __inline void 152 lapic_mem_icr_set(uint32_t apic_id, uint32_t icr_lo_val) 153 { 154 uint32_t icr_lo, icr_hi; 155 156 icr_hi = (LAPIC_MEM_READ(icr_hi) & ~APIC_ID_MASK) | 157 (apic_id << APIC_ID_SHIFT); 158 icr_lo = (LAPIC_MEM_READ(icr_lo) & APIC_ICRLO_RESV_MASK) | icr_lo_val; 159 160 LAPIC_MEM_WRITE(icr_hi, icr_hi); 161 LAPIC_MEM_WRITE(icr_lo, icr_lo); 162 } 163 164 static __inline void 165 lapic_msr_icr_set(uint32_t apic_id, uint32_t icr_lo_val) 166 { 167 LAPIC_MSR_WRITE(MSR_X2APIC_ICR, 168 ((uint64_t)apic_id << 32) | ((uint64_t)icr_lo_val)); 169 } 170 171 /* 172 * Enable LAPIC, configure interrupts. 173 */ 174 void 175 lapic_init(boolean_t bsp) 176 { 177 uint32_t timer; 178 u_int temp; 179 180 if (bsp) { 181 /* Decide whether we want to use TSC Deadline mode. */ 182 if (lapic_timer_tscdeadline != 0 && 183 (cpu_feature2 & CPUID2_TSCDLT) && 184 tsc_invariant && tsc_frequency != 0) { 185 lapic_use_tscdeadline = 1; 186 tsc_deadlines = 187 kmalloc(sizeof(struct deadlines) * (naps + 1), 188 M_DEVBUF, 189 M_WAITOK | M_ZERO | M_CACHEALIGN); 190 } 191 } 192 193 /* 194 * Install vectors 195 * 196 * Since IDT is shared between BSP and APs, these vectors 197 * only need to be installed once; we do it on BSP. 198 */ 199 if (bsp) { 200 if (cpu_vendor_id == CPU_VENDOR_AMD && 201 CPUID_TO_FAMILY(cpu_id) >= 0x0f && 202 CPUID_TO_FAMILY(cpu_id) < 0x17) { /* XXX */ 203 uint32_t tcr; 204 205 /* 206 * Set the LINTEN bit in the HyperTransport 207 * Transaction Control Register. 208 * 209 * This will cause EXTINT and NMI interrupts 210 * routed over the hypertransport bus to be 211 * fed into the LAPIC LINT0/LINT1. If the bit 212 * isn't set, the interrupts will go to the 213 * general cpu INTR/NMI pins. On a dual-core 214 * cpu the interrupt winds up going to BOTH cpus. 215 * The first cpu that does the interrupt ack 216 * cycle will get the correct interrupt. The 217 * second cpu that does it will get a spurious 218 * interrupt vector (typically IRQ 7). 219 */ 220 outl(0x0cf8, 221 (1 << 31) | /* enable */ 222 (0 << 16) | /* bus */ 223 (0x18 << 11) | /* dev (cpu + 0x18) */ 224 (0 << 8) | /* func */ 225 0x68 /* reg */ 226 ); 227 tcr = inl(0xcfc); 228 if ((tcr & 0x00010000) == 0) { 229 kprintf("LAPIC: AMD LINTEN on\n"); 230 outl(0xcfc, tcr|0x00010000); 231 } 232 outl(0x0cf8, 0); 233 } 234 235 /* Install a 'Spurious INTerrupt' vector */ 236 setidt_global(XSPURIOUSINT_OFFSET, Xspuriousint, 237 SDT_SYSIGT, SEL_KPL, 0); 238 239 /* Install a timer vector */ 240 setidt_global(XTIMER_OFFSET, Xtimer, 241 SDT_SYSIGT, SEL_KPL, 0); 242 243 /* Install an inter-CPU IPI for TLB invalidation */ 244 setidt_global(XINVLTLB_OFFSET, Xinvltlb, 245 SDT_SYSIGT, SEL_KPL, 0); 246 247 /* Install an inter-CPU IPI for IPIQ messaging */ 248 setidt_global(XIPIQ_OFFSET, Xipiq, 249 SDT_SYSIGT, SEL_KPL, 0); 250 251 /* Install an inter-CPU IPI for CPU stop/restart */ 252 setidt_global(XCPUSTOP_OFFSET, Xcpustop, 253 SDT_SYSIGT, SEL_KPL, 0); 254 255 /* Install an inter-CPU IPI for TLB invalidation */ 256 setidt_global(XSNIFF_OFFSET, Xsniff, 257 SDT_SYSIGT, SEL_KPL, 0); 258 } 259 260 /* 261 * Setup LINT0 as ExtINT on the BSP. This is theoretically an 262 * aggregate interrupt input from the 8259. The INTA cycle 263 * will be routed to the external controller (the 8259) which 264 * is expected to supply the vector. 265 * 266 * Must be setup edge triggered, active high. 267 * 268 * Disable LINT0 on BSP, if I/O APIC is enabled. 269 * 270 * Disable LINT0 on the APs. It doesn't matter what delivery 271 * mode we use because we leave it masked. 272 */ 273 temp = LAPIC_READ(lvt_lint0); 274 temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | 275 APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); 276 if (bsp) { 277 temp |= APIC_LVT_DM_EXTINT; 278 if (ioapic_enable) 279 temp |= APIC_LVT_MASKED; 280 } else { 281 temp |= APIC_LVT_DM_FIXED | APIC_LVT_MASKED; 282 } 283 LAPIC_WRITE(lvt_lint0, temp); 284 285 /* 286 * Setup LINT1 as NMI. 287 * 288 * Must be setup edge trigger, active high. 289 * 290 * Enable LINT1 on BSP, if I/O APIC is enabled. 291 * 292 * Disable LINT1 on the APs. 293 */ 294 temp = LAPIC_READ(lvt_lint1); 295 temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | 296 APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); 297 temp |= APIC_LVT_MASKED | APIC_LVT_DM_NMI; 298 if (bsp && ioapic_enable) 299 temp &= ~APIC_LVT_MASKED; 300 LAPIC_WRITE(lvt_lint1, temp); 301 302 /* 303 * Mask the LAPIC error interrupt, LAPIC performance counter 304 * interrupt. 305 */ 306 LAPIC_WRITE(lvt_error, LAPIC_READ(lvt_error) | APIC_LVT_MASKED); 307 LAPIC_WRITE(lvt_pcint, LAPIC_READ(lvt_pcint) | APIC_LVT_MASKED); 308 309 /* 310 * Set LAPIC timer vector and mask the LAPIC timer interrupt. 311 */ 312 timer = LAPIC_READ(lvt_timer); 313 timer &= ~APIC_LVTT_VECTOR; 314 timer |= XTIMER_OFFSET; 315 timer |= APIC_LVTT_MASKED; 316 LAPIC_WRITE(lvt_timer, timer); 317 318 /* 319 * Set the Task Priority Register as needed. At the moment allow 320 * interrupts on all cpus (the APs will remain CLId until they are 321 * ready to deal). 322 */ 323 temp = LAPIC_READ(tpr); 324 temp &= ~APIC_TPR_PRIO; /* clear priority field */ 325 LAPIC_WRITE(tpr, temp); 326 327 /* 328 * AMD specific setup 329 */ 330 if (cpu_vendor_id == CPU_VENDOR_AMD && lapic_mem != NULL && 331 (LAPIC_MEM_READ(version) & APIC_VER_AMD_EXT_SPACE)) { 332 uint32_t ext_feat; 333 uint32_t count; 334 uint32_t max_count; 335 uint32_t lvt; 336 uint32_t i; 337 338 ext_feat = LAPIC_MEM_READ(ext_feat); 339 count = (ext_feat & APIC_EXTFEAT_MASK) >> APIC_EXTFEAT_SHIFT; 340 max_count = sizeof(lapic_mem->ext_lvt) / 341 sizeof(lapic_mem->ext_lvt[0]); 342 if (count > max_count) 343 count = max_count; 344 for (i = 0; i < count; ++i) { 345 lvt = LAPIC_MEM_READ(ext_lvt[i].lvt); 346 347 lvt &= ~(APIC_LVT_POLARITY_MASK | APIC_LVT_TRIG_MASK | 348 APIC_LVT_DM_MASK | APIC_LVT_MASKED); 349 lvt |= APIC_LVT_MASKED | APIC_LVT_DM_FIXED; 350 351 switch(i) { 352 case APIC_EXTLVT_IBS: 353 break; 354 case APIC_EXTLVT_MCA: 355 break; 356 case APIC_EXTLVT_DEI: 357 break; 358 case APIC_EXTLVT_SBI: 359 break; 360 default: 361 break; 362 } 363 if (bsp) { 364 kprintf(" LAPIC AMD elvt%d: 0x%08x", 365 i, LAPIC_MEM_READ(ext_lvt[i].lvt)); 366 if (LAPIC_MEM_READ(ext_lvt[i].lvt) != lvt) 367 kprintf(" -> 0x%08x", lvt); 368 kprintf("\n"); 369 } 370 LAPIC_MEM_WRITE(ext_lvt[i].lvt, lvt); 371 } 372 } 373 374 /* 375 * Enable the LAPIC 376 */ 377 temp = LAPIC_READ(svr); 378 temp |= APIC_SVR_ENABLE; /* enable the LAPIC */ 379 temp &= ~APIC_SVR_FOCUS_DISABLE; /* enable lopri focus processor */ 380 381 if (LAPIC_READ(version) & APIC_VER_EOI_SUPP) { 382 if (temp & APIC_SVR_EOI_SUPP) { 383 temp &= ~APIC_SVR_EOI_SUPP; 384 if (bsp) 385 kprintf(" LAPIC disabling EOI supp\n"); 386 } 387 } 388 389 /* 390 * Set the spurious interrupt vector. The low 4 bits of the vector 391 * must be 1111. 392 */ 393 if ((XSPURIOUSINT_OFFSET & 0x0F) != 0x0F) 394 panic("bad XSPURIOUSINT_OFFSET: 0x%08x", XSPURIOUSINT_OFFSET); 395 temp &= ~APIC_SVR_VECTOR; 396 temp |= XSPURIOUSINT_OFFSET; 397 398 LAPIC_WRITE(svr, temp); 399 400 /* 401 * Pump out a few EOIs to clean out interrupts that got through 402 * before we were able to set the TPR. 403 */ 404 LAPIC_WRITE(eoi, 0); 405 LAPIC_WRITE(eoi, 0); 406 LAPIC_WRITE(eoi, 0); 407 408 if (bsp) { 409 lapic_timer_calibrate(); 410 if (lapic_timer_enable) { 411 if (cpu_thermal_feature & CPUID_THERMAL_ARAT) { 412 /* 413 * Local APIC timer will not stop 414 * in deep C-state. 415 */ 416 lapic_cputimer_intr.caps |= 417 CPUTIMER_INTR_CAP_PS; 418 } 419 if (lapic_use_tscdeadline) { 420 lapic_cputimer_intr.reload = 421 lapic_timer_tscdlt_reload; 422 } 423 cputimer_intr_register(&lapic_cputimer_intr); 424 cputimer_intr_select(&lapic_cputimer_intr, 0); 425 } 426 } else if (!lapic_use_tscdeadline) { 427 lapic_timer_set_divisor(lapic_timer_divisor_idx); 428 } 429 430 if (bootverbose) 431 apic_dump("apic_initialize()"); 432 } 433 434 static void 435 lapic_timer_set_divisor(int divisor_idx) 436 { 437 KKASSERT(divisor_idx >= 0 && divisor_idx < APIC_TIMER_NDIVISORS); 438 LAPIC_WRITE(dcr_timer, lapic_timer_divisors[divisor_idx]); 439 } 440 441 static void 442 lapic_timer_oneshot(u_int count) 443 { 444 uint32_t value; 445 446 value = LAPIC_READ(lvt_timer); 447 value &= ~(APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT); 448 LAPIC_WRITE(lvt_timer, value); 449 LAPIC_WRITE(icr_timer, count); 450 } 451 452 static void 453 lapic_timer_oneshot_quick(u_int count) 454 { 455 LAPIC_WRITE(icr_timer, count); 456 } 457 458 static void 459 lapic_timer_tscdeadline_quick(uint64_t diff) 460 { 461 uint64_t val = rdtsc() + diff; 462 463 wrmsr(MSR_TSC_DEADLINE, val); 464 tsc_deadlines[mycpuid].timestamp = val; 465 } 466 467 static uint64_t 468 lapic_scale_to_tsc(unsigned value, unsigned scale) 469 { 470 uint64_t val; 471 472 val = value; 473 val *= tsc_frequency; 474 val += (scale - 1); 475 val /= scale; 476 return val; 477 } 478 479 #define MAX_MEASURE_RETRIES 100 480 481 static u_int64_t 482 do_tsc_calibration(u_int us, u_int64_t apic_delay_tsc) 483 { 484 u_int64_t old_tsc1, old_tsc2, new_tsc1, new_tsc2; 485 u_int64_t diff, count; 486 u_int64_t a; 487 u_int32_t start, end; 488 int retries1 = 0, retries2 = 0; 489 490 retry1: 491 lapic_timer_oneshot_quick(APIC_TIMER_MAX_COUNT); 492 old_tsc1 = rdtsc_ordered(); 493 start = LAPIC_READ(ccr_timer); 494 old_tsc2 = rdtsc_ordered(); 495 if (apic_delay_tsc > 0 && retries1 < MAX_MEASURE_RETRIES && 496 old_tsc2 - old_tsc1 > 2 * apic_delay_tsc) { 497 retries1++; 498 goto retry1; 499 } 500 DELAY(us); 501 retry2: 502 new_tsc1 = rdtsc_ordered(); 503 end = LAPIC_READ(ccr_timer); 504 new_tsc2 = rdtsc_ordered(); 505 if (apic_delay_tsc > 0 && retries2 < MAX_MEASURE_RETRIES && 506 new_tsc2 - new_tsc1 > 2 * apic_delay_tsc) { 507 retries2++; 508 goto retry2; 509 } 510 if (end == 0) 511 return 0; 512 513 count = start - end; 514 515 /* Make sure the lapic can count for up to 2s */ 516 a = (unsigned)APIC_TIMER_MAX_COUNT; 517 if (us < 2000000 && (u_int64_t)count * 2000000 >= a * us) 518 return 0; 519 520 if (lapic_calibrate_test > 0 && (retries1 > 0 || retries2 > 0)) { 521 kprintf("%s: retries1=%d retries2=%d\n", 522 __func__, retries1, retries2); 523 } 524 525 diff = (new_tsc1 - old_tsc1) + (new_tsc2 - old_tsc2); 526 /* XXX First estimate if the total TSC diff value makes sense */ 527 /* This will almost overflow, but only almost :) */ 528 count = (2 * count * tsc_frequency) / diff; 529 530 return count; 531 } 532 533 static uint64_t 534 do_cputimer_calibration(u_int us) 535 { 536 sysclock_t value; 537 sysclock_t start, end, beginning, finish; 538 539 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT); 540 beginning = LAPIC_READ(ccr_timer); 541 start = sys_cputimer->count(); 542 DELAY(us); 543 end = sys_cputimer->count(); 544 finish = LAPIC_READ(ccr_timer); 545 if (finish == 0) 546 return 0; 547 /* value is the LAPIC timer difference. */ 548 value = beginning - finish; 549 /* end is the sys_cputimer difference. */ 550 end -= start; 551 if (end == 0) 552 return 0; 553 value = ((uint64_t)value * sys_cputimer->freq) / end; 554 return value; 555 } 556 557 static void 558 lapic_timer_calibrate(void) 559 { 560 sysclock_t value; 561 u_int64_t apic_delay_tsc = 0; 562 int use_tsc_calibration = 0; 563 564 /* No need to calibrate lapic_timer, if we will use TSC Deadline mode */ 565 if (lapic_use_tscdeadline) { 566 lapic_timer_tscfreq_shift = 0; 567 while ((tsc_frequency >> lapic_timer_tscfreq_shift) > INT_MAX) 568 lapic_timer_tscfreq_shift++; 569 lapic_cputimer_intr.freq = 570 tsc_frequency >> lapic_timer_tscfreq_shift; 571 kprintf( 572 "lapic: TSC Deadline Mode: shift %d, frequency %u Hz\n", 573 lapic_timer_tscfreq_shift, lapic_cputimer_intr.freq); 574 return; 575 } 576 577 /* 578 * On real hardware, tsc_invariant == 0 wouldn't be an issue, but in 579 * a virtual machine the frequency may get changed by the host. 580 */ 581 if (tsc_frequency != 0 && tsc_invariant && lapic_calibrate_fast) 582 use_tsc_calibration = 1; 583 584 if (use_tsc_calibration) { 585 u_int64_t min_apic_tsc = 0, max_apic_tsc = 0; 586 u_int64_t old_tsc, new_tsc; 587 sysclock_t val; 588 int i; 589 590 /* warm up */ 591 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT); 592 for (i = 0; i < 10; i++) 593 val = LAPIC_READ(ccr_timer); 594 595 for (i = 0; i < 100; i++) { 596 old_tsc = rdtsc_ordered(); 597 val = LAPIC_READ(ccr_timer); 598 new_tsc = rdtsc_ordered(); 599 new_tsc -= old_tsc; 600 apic_delay_tsc += new_tsc; 601 if (min_apic_tsc == 0 || 602 min_apic_tsc > new_tsc) { 603 min_apic_tsc = new_tsc; 604 } 605 if (max_apic_tsc < new_tsc) 606 max_apic_tsc = new_tsc; 607 } 608 apic_delay_tsc /= 100; 609 kprintf( 610 "LAPIC latency (in TSC ticks): %lu min: %lu max: %lu\n", 611 apic_delay_tsc, min_apic_tsc, max_apic_tsc); 612 apic_delay_tsc = min_apic_tsc; 613 } 614 615 if (!use_tsc_calibration) { 616 int i; 617 618 /* 619 * Do some exercising of the lapic timer access. This improves 620 * precision of the subsequent calibration run in at least some 621 * virtualization cases. 622 */ 623 lapic_timer_set_divisor(0); 624 for (i = 0; i < 10; i++) 625 (void)do_cputimer_calibration(100); 626 } 627 /* Try to calibrate the local APIC timer. */ 628 for (lapic_timer_divisor_idx = 0; 629 lapic_timer_divisor_idx < APIC_TIMER_NDIVISORS; 630 lapic_timer_divisor_idx++) { 631 lapic_timer_set_divisor(lapic_timer_divisor_idx); 632 if (use_tsc_calibration) { 633 value = do_tsc_calibration(200*1000, apic_delay_tsc); 634 } else { 635 value = do_cputimer_calibration(2*1000*1000); 636 } 637 if (value != 0) 638 break; 639 } 640 if (lapic_timer_divisor_idx >= APIC_TIMER_NDIVISORS) 641 panic("lapic: no proper timer divisor?!"); 642 lapic_cputimer_intr.freq = value; 643 644 kprintf("lapic: divisor index %d, frequency %u Hz\n", 645 lapic_timer_divisor_idx, lapic_cputimer_intr.freq); 646 647 if (lapic_calibrate_test > 0) { 648 uint64_t freq; 649 int i; 650 651 for (i = 1; i <= 20; i++) { 652 if (use_tsc_calibration) { 653 freq = do_tsc_calibration(i*100*1000, 654 apic_delay_tsc); 655 } else { 656 freq = do_cputimer_calibration(i*100*1000); 657 } 658 if (freq != 0) 659 kprintf("%ums: %lu\n", i * 100, freq); 660 } 661 } 662 } 663 664 static void 665 lapic_timer_tscdlt_reload(struct cputimer_intr *cti, sysclock_t reload) 666 { 667 struct globaldata *gd = mycpu; 668 uint64_t diff, now, val; 669 670 if (reload > 1000*1000*1000) 671 reload = 1000*1000*1000; 672 diff = (uint64_t)reload * tsc_frequency / sys_cputimer->freq; 673 if (diff < 4) 674 diff = 4; 675 if (cpu_vendor_id == CPU_VENDOR_INTEL) 676 cpu_lfence(); 677 else 678 cpu_mfence(); 679 now = rdtsc(); 680 val = now + diff; 681 if (gd->gd_timer_running) { 682 uint64_t deadline = tsc_deadlines[mycpuid].timestamp; 683 if (deadline == 0 || now > deadline || val < deadline) { 684 wrmsr(MSR_TSC_DEADLINE, val); 685 tsc_deadlines[mycpuid].timestamp = val; 686 } 687 } else { 688 gd->gd_timer_running = 1; 689 wrmsr(MSR_TSC_DEADLINE, val); 690 tsc_deadlines[mycpuid].timestamp = val; 691 } 692 } 693 694 static void 695 lapic_mem_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload) 696 { 697 struct globaldata *gd = mycpu; 698 699 reload = (int64_t)reload * cti->freq / sys_cputimer->freq; 700 if (reload < 2) 701 reload = 2; 702 703 if (gd->gd_timer_running) { 704 if (reload < LAPIC_MEM_READ(ccr_timer)) 705 LAPIC_MEM_WRITE(icr_timer, reload); 706 } else { 707 gd->gd_timer_running = 1; 708 LAPIC_MEM_WRITE(icr_timer, reload); 709 } 710 } 711 712 static void 713 lapic_msr_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload) 714 { 715 struct globaldata *gd = mycpu; 716 717 reload = (int64_t)reload * cti->freq / sys_cputimer->freq; 718 if (reload < 2) 719 reload = 2; 720 721 if (gd->gd_timer_running) { 722 if (reload < LAPIC_MSR_READ(MSR_X2APIC_CCR_TIMER)) 723 LAPIC_MSR_WRITE(MSR_X2APIC_ICR_TIMER, reload); 724 } else { 725 gd->gd_timer_running = 1; 726 LAPIC_MSR_WRITE(MSR_X2APIC_ICR_TIMER, reload); 727 } 728 } 729 730 static void 731 lapic_timer_intr_enable(struct cputimer_intr *cti __unused) 732 { 733 uint32_t timer; 734 735 timer = LAPIC_READ(lvt_timer); 736 timer &= ~(APIC_LVTT_MASKED | APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT); 737 if (lapic_use_tscdeadline) 738 timer |= APIC_LVTT_TSCDLT; 739 LAPIC_WRITE(lvt_timer, timer); 740 if (lapic_use_tscdeadline) 741 cpu_mfence(); 742 743 lapic_timer_fixup_handler(NULL); 744 } 745 746 static void 747 lapic_timer_fixup_handler(void *arg) 748 { 749 int *started = arg; 750 751 if (started != NULL) 752 *started = 0; 753 754 if (cpu_vendor_id == CPU_VENDOR_AMD) { 755 int c1e_test = lapic_timer_c1e_test; 756 757 if (c1e_test < 0) { 758 if (vmm_guest == VMM_GUEST_NONE) { 759 c1e_test = 1; 760 } else { 761 /* 762 * Don't do this C1E testing and adjustment 763 * on virtual machines, the best case for 764 * accessing this MSR is a NOOP; the worst 765 * cases could be pretty nasty, e.g. crash. 766 */ 767 c1e_test = 0; 768 } 769 } 770 771 /* 772 * Detect the presence of C1E capability mostly on latest 773 * dual-cores (or future) k8 family. This feature renders 774 * the local APIC timer dead, so we disable it by reading 775 * the Interrupt Pending Message register and clearing both 776 * C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27). 777 * 778 * Reference: 779 * "BIOS and Kernel Developer's Guide for AMD NPT 780 * Family 0Fh Processors" 781 * #32559 revision 3.00 782 */ 783 if ((cpu_id & 0x00000f00) == 0x00000f00 && 784 (cpu_id & 0x0fff0000) >= 0x00040000 && 785 c1e_test) { 786 uint64_t msr; 787 788 msr = rdmsr(0xc0010055); 789 if (msr & 0x18000000) { 790 struct globaldata *gd = mycpu; 791 792 kprintf("cpu%d: AMD C1E detected\n", 793 gd->gd_cpuid); 794 wrmsr(0xc0010055, msr & ~0x18000000ULL); 795 796 /* 797 * We are kinda stalled; 798 * kick start again. 799 */ 800 gd->gd_timer_running = 1; 801 if (lapic_use_tscdeadline) { 802 /* Maybe reached in Virtual Machines? */ 803 lapic_timer_tscdeadline_quick(5000); 804 } else { 805 lapic_timer_oneshot_quick(2); 806 } 807 808 if (started != NULL) 809 *started = 1; 810 } 811 } 812 } 813 } 814 815 static void 816 lapic_timer_restart_handler(void *dummy __unused) 817 { 818 int started; 819 820 lapic_timer_fixup_handler(&started); 821 if (!started) { 822 struct globaldata *gd = mycpu; 823 824 gd->gd_timer_running = 1; 825 if (lapic_use_tscdeadline) { 826 /* Maybe reached in Virtual Machines? */ 827 lapic_timer_tscdeadline_quick(5000); 828 } else { 829 lapic_timer_oneshot_quick(2); 830 } 831 } 832 } 833 834 /* 835 * This function is called only by ACPICA code currently: 836 * - AMD C1E fixup. AMD C1E only seems to happen after ACPI 837 * module controls PM. So once ACPICA is attached, we try 838 * to apply the fixup to prevent LAPIC timer from hanging. 839 */ 840 static void 841 lapic_timer_intr_pmfixup(struct cputimer_intr *cti __unused) 842 { 843 lwkt_send_ipiq_mask(smp_active_mask, 844 lapic_timer_fixup_handler, NULL); 845 } 846 847 static void 848 lapic_timer_intr_restart(struct cputimer_intr *cti __unused) 849 { 850 lwkt_send_ipiq_mask(smp_active_mask, lapic_timer_restart_handler, NULL); 851 } 852 853 854 /* 855 * dump contents of local APIC registers 856 */ 857 void 858 apic_dump(char* str) 859 { 860 kprintf("SMP: CPU%d %s:\n", mycpu->gd_cpuid, str); 861 kprintf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", 862 LAPIC_READ(lvt_lint0), LAPIC_READ(lvt_lint1), LAPIC_READ(tpr), 863 LAPIC_READ(svr)); 864 } 865 866 /* 867 * Inter Processor Interrupt functions. 868 */ 869 870 static __inline void 871 lapic_mem_icr_unpend(const char *func) 872 { 873 if (LAPIC_MEM_READ(icr_lo) & APIC_DELSTAT_PEND) { 874 int64_t tsc; 875 int loops = 1; 876 877 tsc = rdtsc(); 878 while (LAPIC_MEM_READ(icr_lo) & APIC_DELSTAT_PEND) { 879 cpu_pause(); 880 if ((tsc_sclock_t)(rdtsc() - 881 (tsc + tsc_frequency)) > 0) { 882 tsc = rdtsc(); 883 if (++loops > 30) { 884 panic("%s: cpu%d apic stalled", 885 func, mycpuid); 886 } else { 887 kprintf("%s: cpu%d apic stalled\n", 888 func, mycpuid); 889 } 890 } 891 } 892 } 893 } 894 895 /* 896 * Send APIC IPI 'vector' to 'destType' via 'deliveryMode'. 897 * 898 * destType is 1 of: APIC_DEST_SELF, APIC_DEST_ALLISELF, APIC_DEST_ALLESELF 899 * vector is any valid SYSTEM INT vector 900 * delivery_mode is 1 of: APIC_DELMODE_FIXED, APIC_DELMODE_LOWPRIO 901 * 902 * WARNINGS! 903 * 904 * We now implement a per-cpu interlock (gd->gd_npoll) to prevent more than 905 * one IPI from being sent to any given cpu at a time. Thus we no longer 906 * have to process incoming IPIs while waiting for the status to clear. 907 * No deadlock should be possible. 908 * 909 * We now physically disable interrupts for the lapic ICR operation. If 910 * we do not do this then it looks like an EOI sent to the lapic (which 911 * occurs even with a critical section) can interfere with the command 912 * register ready status and cause an IPI to be lost. 913 * 914 * e.g. an interrupt can occur, issue the EOI, IRET, and cause the command 915 * register to busy just before we write to icr_lo, resulting in a lost 916 * issuance. This only appears to occur on Intel cpus and is not 917 * documented. It could simply be that cpus are so fast these days that 918 * it was always an issue, but is only now rearing its ugly head. This 919 * is conjecture. 920 */ 921 static int 922 lapic_mem_ipi(int dest_type, int vector, int delivery_mode) 923 { 924 lapic_mem_icr_unpend(__func__); 925 lapic_mem_icr_set(0, 926 dest_type | APIC_LEVEL_ASSERT | delivery_mode | vector); 927 return 0; 928 } 929 930 static int 931 lapic_msr_ipi(int dest_type, int vector, int delivery_mode) 932 { 933 lapic_msr_icr_set(0, 934 dest_type | APIC_LEVEL_ASSERT | delivery_mode | vector); 935 return 0; 936 } 937 938 /* 939 * Interrupts must be hard-disabled by caller 940 */ 941 static void 942 lapic_mem_single_ipi(int cpu, int vector, int delivery_mode) 943 { 944 lapic_mem_icr_unpend(__func__); 945 lapic_mem_icr_set(CPUID_TO_APICID(cpu), 946 APIC_DEST_DESTFLD | APIC_LEVEL_ASSERT | delivery_mode | vector); 947 } 948 949 static void 950 lapic_msr_single_ipi(int cpu, int vector, int delivery_mode) 951 { 952 lapic_msr_icr_set(CPUID_TO_APICID(cpu), 953 APIC_DEST_DESTFLD | APIC_LEVEL_ASSERT | delivery_mode | vector); 954 } 955 956 /* 957 * Send APIC IPI 'vector' to 'target's via 'delivery_mode'. 958 * 959 * target is a bitmask of destination cpus. Vector is any 960 * valid system INT vector. Delivery mode may be either 961 * APIC_DELMODE_FIXED or APIC_DELMODE_LOWPRIO. 962 * 963 * Interrupts must be hard-disabled by caller 964 */ 965 void 966 selected_apic_ipi(cpumask_t target, int vector, int delivery_mode) 967 { 968 while (CPUMASK_TESTNZERO(target)) { 969 int n = BSFCPUMASK(target); 970 CPUMASK_NANDBIT(target, n); 971 single_apic_ipi(n, vector, delivery_mode); 972 } 973 } 974 975 /* 976 * Load a 'downcount time' in uSeconds. 977 */ 978 void 979 set_apic_timer(int us) 980 { 981 u_int count; 982 983 if (lapic_use_tscdeadline) { 984 uint64_t val; 985 986 val = lapic_scale_to_tsc(us, 1000000); 987 val += rdtsc(); 988 /* No need to arm the lapic here, just track the timeout. */ 989 tsc_deadlines[mycpuid].downcount_time = val; 990 return; 991 } 992 993 /* 994 * When we reach here, lapic timer's frequency 995 * must have been calculated as well as the 996 * divisor (lapic->dcr_timer is setup during the 997 * divisor calculation). 998 */ 999 KKASSERT(lapic_cputimer_intr.freq != 0 && 1000 lapic_timer_divisor_idx >= 0); 1001 1002 count = ((us * (int64_t)lapic_cputimer_intr.freq) + 999999) / 1000000; 1003 lapic_timer_oneshot(count); 1004 } 1005 1006 1007 /* 1008 * Read remaining time in timer, in microseconds (rounded up). 1009 */ 1010 int 1011 read_apic_timer(void) 1012 { 1013 uint64_t val; 1014 1015 if (lapic_use_tscdeadline) { 1016 uint64_t now; 1017 1018 val = tsc_deadlines[mycpuid].downcount_time; 1019 now = rdtsc(); 1020 if (val == 0 || now > val) { 1021 return 0; 1022 } else { 1023 val -= now; 1024 val *= 1000000; 1025 val += (tsc_frequency - 1); 1026 val /= tsc_frequency; 1027 if (val > INT_MAX) 1028 val = INT_MAX; 1029 return val; 1030 } 1031 } 1032 1033 val = LAPIC_READ(ccr_timer); 1034 if (val == 0) 1035 return 0; 1036 1037 KKASSERT(lapic_cputimer_intr.freq > 0); 1038 val *= 1000000; 1039 val += (lapic_cputimer_intr.freq - 1); 1040 val /= lapic_cputimer_intr.freq; 1041 if (val > INT_MAX) 1042 val = INT_MAX; 1043 return val; 1044 } 1045 1046 1047 /* 1048 * Spin-style delay, set delay time in uS, spin till it drains. 1049 */ 1050 void 1051 u_sleep(int count) 1052 { 1053 set_apic_timer(count); 1054 while (read_apic_timer()) 1055 /* spin */ ; 1056 } 1057 1058 int 1059 lapic_unused_apic_id(int start) 1060 { 1061 int i; 1062 1063 for (i = start; i < APICID_MAX; ++i) { 1064 if (APICID_TO_CPUID(i) == -1) 1065 return i; 1066 } 1067 return NAPICID; 1068 } 1069 1070 void 1071 lapic_map(vm_paddr_t lapic_addr) 1072 { 1073 lapic_mem = pmap_mapdev_uncacheable(lapic_addr, sizeof(struct LAPIC)); 1074 } 1075 1076 void 1077 lapic_x2apic_enter(boolean_t bsp) 1078 { 1079 uint64_t apic_base; 1080 1081 KASSERT(x2apic_enable, ("X2APIC mode is not enabled")); 1082 1083 /* 1084 * X2APIC mode is requested, if it has not been enabled by the BIOS, 1085 * enable it now. 1086 */ 1087 apic_base = rdmsr(MSR_APICBASE); 1088 if ((apic_base & APICBASE_X2APIC) == 0) { 1089 wrmsr(MSR_APICBASE, 1090 apic_base | APICBASE_X2APIC | APICBASE_ENABLED); 1091 } 1092 if (bsp) { 1093 lapic_eoi = lapic_msr_eoi; 1094 apic_ipi = lapic_msr_ipi; 1095 single_apic_ipi = lapic_msr_single_ipi; 1096 lapic_cputimer_intr.reload = lapic_msr_timer_intr_reload; 1097 } 1098 } 1099 1100 static TAILQ_HEAD(, lapic_enumerator) lapic_enumerators = 1101 TAILQ_HEAD_INITIALIZER(lapic_enumerators); 1102 1103 int 1104 lapic_config(void) 1105 { 1106 struct lapic_enumerator *e; 1107 uint64_t apic_base; 1108 int error, i, ap_max; 1109 1110 KKASSERT(lapic_enable); 1111 1112 lapic_eoi = lapic_mem_eoi; 1113 apic_ipi = lapic_mem_ipi; 1114 single_apic_ipi = lapic_mem_single_ipi; 1115 1116 TUNABLE_INT_FETCH("hw.x2apic_enable", &x2apic_enable); 1117 if (x2apic_enable < 0) 1118 x2apic_enable = 1; 1119 1120 if ((cpu_feature2 & CPUID2_X2APIC) == 0) { 1121 /* X2APIC is not supported. */ 1122 x2apic_enable = 0; 1123 } else if (!x2apic_enable) { 1124 /* 1125 * If the BIOS enabled the X2APIC mode, then we would stick 1126 * with the X2APIC mode. 1127 */ 1128 apic_base = rdmsr(MSR_APICBASE); 1129 if (apic_base & APICBASE_X2APIC) { 1130 kprintf("LAPIC: BIOS enabled X2APIC mode\n"); 1131 x2apic_enable = 1; 1132 } 1133 } 1134 1135 if (x2apic_enable) { 1136 /* 1137 * Enter X2APIC mode. 1138 */ 1139 kprintf("LAPIC: enter X2APIC mode\n"); 1140 lapic_x2apic_enter(TRUE); 1141 } 1142 1143 for (i = 0; i < NAPICID; ++i) 1144 APICID_TO_CPUID(i) = -1; 1145 1146 TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) { 1147 error = e->lapic_probe(e); 1148 if (!error) 1149 break; 1150 } 1151 if (e == NULL) { 1152 kprintf("LAPIC: Can't find LAPIC\n"); 1153 return ENXIO; 1154 } 1155 1156 error = e->lapic_enumerate(e); 1157 if (error) { 1158 kprintf("LAPIC: enumeration failed\n"); 1159 return ENXIO; 1160 } 1161 1162 /* LAPIC is usable now. */ 1163 lapic_usable = 1; 1164 1165 ap_max = MAXCPU - 1; 1166 TUNABLE_INT_FETCH("hw.ap_max", &ap_max); 1167 if (ap_max > MAXCPU - 1) 1168 ap_max = MAXCPU - 1; 1169 1170 if (naps > ap_max) { 1171 kprintf("LAPIC: Warning use only %d out of %d " 1172 "available APs\n", 1173 ap_max, naps); 1174 naps = ap_max; 1175 } 1176 1177 return 0; 1178 } 1179 1180 void 1181 lapic_enumerator_register(struct lapic_enumerator *ne) 1182 { 1183 struct lapic_enumerator *e; 1184 1185 TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) { 1186 if (e->lapic_prio < ne->lapic_prio) { 1187 TAILQ_INSERT_BEFORE(e, ne, lapic_link); 1188 return; 1189 } 1190 } 1191 TAILQ_INSERT_TAIL(&lapic_enumerators, ne, lapic_link); 1192 } 1193 1194 void 1195 lapic_set_cpuid(int cpu_id, int apic_id) 1196 { 1197 CPUID_TO_APICID(cpu_id) = apic_id; 1198 APICID_TO_CPUID(apic_id) = cpu_id; 1199 } 1200 1201 void 1202 lapic_fixup_noioapic(void) 1203 { 1204 u_int temp; 1205 1206 /* Only allowed on BSP */ 1207 KKASSERT(mycpuid == 0); 1208 KKASSERT(!ioapic_enable); 1209 1210 temp = LAPIC_READ(lvt_lint0); 1211 temp &= ~APIC_LVT_MASKED; 1212 LAPIC_WRITE(lvt_lint0, temp); 1213 1214 temp = LAPIC_READ(lvt_lint1); 1215 temp |= APIC_LVT_MASKED; 1216 LAPIC_WRITE(lvt_lint1, temp); 1217 } 1218 1219 static void 1220 lapic_mem_eoi(void) 1221 { 1222 log_lapic(mem_eoi); 1223 LAPIC_MEM_WRITE(eoi, 0); 1224 } 1225 1226 static void 1227 lapic_msr_eoi(void) 1228 { 1229 log_lapic(msr_eoi); 1230 LAPIC_MSR_WRITE(MSR_X2APIC_EOI, 0); 1231 } 1232 1233 static void 1234 lapic_mem_seticr_sync(uint32_t apic_id, uint32_t icr_lo_val) 1235 { 1236 lapic_mem_icr_set(apic_id, icr_lo_val); 1237 while (LAPIC_MEM_READ(icr_lo) & APIC_DELSTAT_PEND) 1238 /* spin */; 1239 } 1240 1241 void 1242 lapic_seticr_sync(uint32_t apic_id, uint32_t icr_lo_val) 1243 { 1244 if (x2apic_enable) 1245 lapic_msr_icr_set(apic_id, icr_lo_val); 1246 else 1247 lapic_mem_seticr_sync(apic_id, icr_lo_val); 1248 } 1249 1250 static void 1251 lapic_sysinit(void *dummy __unused) 1252 { 1253 if (lapic_enable) { 1254 int error; 1255 1256 error = lapic_config(); 1257 if (error) 1258 lapic_enable = 0; 1259 } 1260 if (!lapic_enable) 1261 x2apic_enable = 0; 1262 1263 if (lapic_enable) { 1264 /* Initialize BSP's local APIC */ 1265 lapic_init(TRUE); 1266 } else if (ioapic_enable) { 1267 ioapic_enable = 0; 1268 icu_reinit_noioapic(); 1269 } 1270 } 1271 SYSINIT(lapic, SI_BOOT2_LAPIC, SI_ORDER_FIRST, lapic_sysinit, NULL); 1272