1 /* 2 * Copyright (c) 1996, by Steve Passe 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. The name of the developer may NOT be used to endorse or promote products 11 * derived from this software without specific prior written permission. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: src/sys/i386/i386/mpapic.c,v 1.37.2.7 2003/01/25 02:31:47 peter Exp $ 26 */ 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/kernel.h> 31 #include <sys/ktr.h> 32 #include <sys/bus.h> 33 #include <sys/machintr.h> 34 #include <sys/malloc.h> 35 #include <sys/sysctl.h> 36 #include <machine/globaldata.h> 37 #include <machine/clock.h> 38 #include <machine/limits.h> 39 #include <machine/smp.h> 40 #include <machine/md_var.h> 41 #include <machine/pmap.h> 42 #include <machine/specialreg.h> 43 #include <machine_base/apic/lapic.h> 44 #include <machine_base/apic/ioapic.h> 45 #include <machine_base/apic/ioapic_abi.h> 46 #include <machine_base/apic/apicvar.h> 47 #include <machine_base/icu/icu_var.h> 48 #include <machine/segments.h> 49 #include <sys/spinlock2.h> 50 51 #include <machine/cputypes.h> 52 #include <machine/intr_machdep.h> 53 54 #if !defined(KTR_LAPIC) 55 #define KTR_LAPIC KTR_ALL 56 #endif 57 KTR_INFO_MASTER(lapic); 58 KTR_INFO(KTR_LAPIC, lapic, mem_eoi, 0, "mem_eoi"); 59 KTR_INFO(KTR_LAPIC, lapic, msr_eoi, 0, "msr_eoi"); 60 #define log_lapic(name) KTR_LOG(lapic_ ## name) 61 62 extern int naps; 63 64 volatile lapic_t *lapic_mem; 65 66 static void lapic_timer_calibrate(void); 67 static void lapic_timer_set_divisor(int); 68 static void lapic_timer_fixup_handler(void *); 69 static void lapic_timer_restart_handler(void *); 70 71 static int lapic_timer_c1e_test = -1; /* auto-detect */ 72 TUNABLE_INT("hw.lapic_timer_c1e_test", &lapic_timer_c1e_test); 73 74 static int lapic_timer_enable = 1; 75 TUNABLE_INT("hw.lapic_timer_enable", &lapic_timer_enable); 76 77 static int lapic_timer_tscdeadline = 1; 78 TUNABLE_INT("hw.lapic_timer_tscdeadline", &lapic_timer_tscdeadline); 79 80 static int lapic_calibrate_test = 0; 81 TUNABLE_INT("hw.lapic_calibrate_test", &lapic_calibrate_test); 82 83 static int lapic_calibrate_fast = 1; 84 TUNABLE_INT("hw.lapic_calibrate_fast", &lapic_calibrate_fast); 85 86 static void lapic_timer_tscdlt_reload(struct cputimer_intr *, sysclock_t); 87 static void lapic_mem_timer_intr_reload(struct cputimer_intr *, sysclock_t); 88 static void lapic_msr_timer_intr_reload(struct cputimer_intr *, sysclock_t); 89 static void lapic_timer_intr_enable(struct cputimer_intr *); 90 static void lapic_timer_intr_restart(struct cputimer_intr *); 91 static void lapic_timer_intr_pmfixup(struct cputimer_intr *); 92 93 static struct cputimer_intr lapic_cputimer_intr = { 94 .freq = 0, 95 .reload = lapic_mem_timer_intr_reload, 96 .enable = lapic_timer_intr_enable, 97 .config = cputimer_intr_default_config, 98 .restart = lapic_timer_intr_restart, 99 .pmfixup = lapic_timer_intr_pmfixup, 100 .initclock = cputimer_intr_default_initclock, 101 .pcpuhand = NULL, 102 .next = SLIST_ENTRY_INITIALIZER, 103 .name = "lapic", 104 .type = CPUTIMER_INTR_LAPIC, 105 .prio = CPUTIMER_INTR_PRIO_LAPIC, 106 .caps = CPUTIMER_INTR_CAP_NONE, 107 .priv = NULL 108 }; 109 110 static int lapic_timer_divisor_idx = -1; 111 static const uint32_t lapic_timer_divisors[] = { 112 APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, 113 APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128, APIC_TDCR_1 114 }; 115 #define APIC_TIMER_NDIVISORS (int)(NELEM(lapic_timer_divisors)) 116 117 static int lapic_use_tscdeadline = 0; 118 /* The raw TSC frequency might not fit into a sysclock_t value. */ 119 static int lapic_timer_tscfreq_shift; 120 121 /* 122 * APIC ID <-> CPU ID mapping structures. 123 */ 124 int cpu_id_to_apic_id[NAPICID]; 125 int apic_id_to_cpu_id[NAPICID]; 126 int lapic_enable = 1; 127 int lapic_usable = 0; 128 int x2apic_enable = 1; 129 130 SYSCTL_INT(_hw, OID_AUTO, x2apic_enable, CTLFLAG_RD, &x2apic_enable, 0, ""); 131 132 /* Separate cachelines for each cpu's info. */ 133 struct deadlines { 134 uint64_t timestamp; 135 uint64_t downcount_time; 136 uint64_t padding[6]; 137 }; 138 struct deadlines *tsc_deadlines = NULL; 139 140 static void lapic_mem_eoi(void); 141 static int lapic_mem_ipi(int dest_type, int vector, int delivery_mode); 142 static void lapic_mem_single_ipi(int cpu, int vector, int delivery_mode); 143 144 static void lapic_msr_eoi(void); 145 static int lapic_msr_ipi(int dest_type, int vector, int delivery_mode); 146 static void lapic_msr_single_ipi(int cpu, int vector, int delivery_mode); 147 148 void (*lapic_eoi)(void); 149 int (*apic_ipi)(int dest_type, int vector, int delivery_mode); 150 void (*single_apic_ipi)(int cpu, int vector, int delivery_mode); 151 152 static __inline void 153 lapic_mem_icr_set(uint32_t apic_id, uint32_t icr_lo_val) 154 { 155 uint32_t icr_lo, icr_hi; 156 157 icr_hi = (LAPIC_MEM_READ(icr_hi) & ~APIC_ID_MASK) | 158 (apic_id << APIC_ID_SHIFT); 159 icr_lo = (LAPIC_MEM_READ(icr_lo) & APIC_ICRLO_RESV_MASK) | icr_lo_val; 160 161 LAPIC_MEM_WRITE(icr_hi, icr_hi); 162 LAPIC_MEM_WRITE(icr_lo, icr_lo); 163 } 164 165 static __inline void 166 lapic_msr_icr_set(uint32_t apic_id, uint32_t icr_lo_val) 167 { 168 LAPIC_MSR_WRITE(MSR_X2APIC_ICR, 169 ((uint64_t)apic_id << 32) | ((uint64_t)icr_lo_val)); 170 } 171 172 /* 173 * Enable LAPIC, configure interrupts. 174 */ 175 void 176 lapic_init(boolean_t bsp) 177 { 178 uint32_t timer; 179 u_int temp; 180 181 if (bsp) { 182 /* Decide whether we want to use TSC Deadline mode. */ 183 if (lapic_timer_tscdeadline != 0 && 184 (cpu_feature2 & CPUID2_TSCDLT) && 185 tsc_invariant && tsc_frequency != 0) { 186 lapic_use_tscdeadline = 1; 187 tsc_deadlines = 188 kmalloc(sizeof(struct deadlines) * (naps + 1), 189 M_DEVBUF, 190 M_WAITOK | M_ZERO | M_CACHEALIGN); 191 } 192 } 193 194 /* 195 * Install vectors 196 * 197 * Since IDT is shared between BSP and APs, these vectors 198 * only need to be installed once; we do it on BSP. 199 */ 200 if (bsp) { 201 if (cpu_vendor_id == CPU_VENDOR_AMD && 202 CPUID_TO_FAMILY(cpu_id) >= 0x0f && 203 CPUID_TO_FAMILY(cpu_id) < 0x17) { /* XXX */ 204 uint32_t tcr; 205 206 /* 207 * Set the LINTEN bit in the HyperTransport 208 * Transaction Control Register. 209 * 210 * This will cause EXTINT and NMI interrupts 211 * routed over the hypertransport bus to be 212 * fed into the LAPIC LINT0/LINT1. If the bit 213 * isn't set, the interrupts will go to the 214 * general cpu INTR/NMI pins. On a dual-core 215 * cpu the interrupt winds up going to BOTH cpus. 216 * The first cpu that does the interrupt ack 217 * cycle will get the correct interrupt. The 218 * second cpu that does it will get a spurious 219 * interrupt vector (typically IRQ 7). 220 */ 221 outl(0x0cf8, 222 (1 << 31) | /* enable */ 223 (0 << 16) | /* bus */ 224 (0x18 << 11) | /* dev (cpu + 0x18) */ 225 (0 << 8) | /* func */ 226 0x68 /* reg */ 227 ); 228 tcr = inl(0xcfc); 229 if ((tcr & 0x00010000) == 0) { 230 kprintf("LAPIC: AMD LINTEN on\n"); 231 outl(0xcfc, tcr|0x00010000); 232 } 233 outl(0x0cf8, 0); 234 } 235 236 /* Install a 'Spurious INTerrupt' vector */ 237 setidt_global(XSPURIOUSINT_OFFSET, Xspuriousint, 238 SDT_SYSIGT, SEL_KPL, 0); 239 240 /* Install a timer vector */ 241 setidt_global(XTIMER_OFFSET, Xtimer, 242 SDT_SYSIGT, SEL_KPL, 0); 243 244 /* Install an inter-CPU IPI for TLB invalidation */ 245 setidt_global(XINVLTLB_OFFSET, Xinvltlb, 246 SDT_SYSIGT, SEL_KPL, 0); 247 248 /* Install an inter-CPU IPI for IPIQ messaging */ 249 setidt_global(XIPIQ_OFFSET, Xipiq, 250 SDT_SYSIGT, SEL_KPL, 0); 251 252 /* Install an inter-CPU IPI for CPU stop/restart */ 253 setidt_global(XCPUSTOP_OFFSET, Xcpustop, 254 SDT_SYSIGT, SEL_KPL, 0); 255 256 /* Install an inter-CPU IPI for TLB invalidation */ 257 setidt_global(XSNIFF_OFFSET, Xsniff, 258 SDT_SYSIGT, SEL_KPL, 0); 259 } 260 261 /* 262 * Setup LINT0 as ExtINT on the BSP. This is theoretically an 263 * aggregate interrupt input from the 8259. The INTA cycle 264 * will be routed to the external controller (the 8259) which 265 * is expected to supply the vector. 266 * 267 * Must be setup edge triggered, active high. 268 * 269 * Disable LINT0 on BSP, if I/O APIC is enabled. 270 * 271 * Disable LINT0 on the APs. It doesn't matter what delivery 272 * mode we use because we leave it masked. 273 */ 274 temp = LAPIC_READ(lvt_lint0); 275 temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | 276 APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); 277 if (bsp) { 278 temp |= APIC_LVT_DM_EXTINT; 279 if (ioapic_enable) 280 temp |= APIC_LVT_MASKED; 281 } else { 282 temp |= APIC_LVT_DM_FIXED | APIC_LVT_MASKED; 283 } 284 LAPIC_WRITE(lvt_lint0, temp); 285 286 /* 287 * Setup LINT1 as NMI. 288 * 289 * Must be setup edge trigger, active high. 290 * 291 * Enable LINT1 on BSP, if I/O APIC is enabled. 292 * 293 * Disable LINT1 on the APs. 294 */ 295 temp = LAPIC_READ(lvt_lint1); 296 temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | 297 APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); 298 temp |= APIC_LVT_MASKED | APIC_LVT_DM_NMI; 299 if (bsp && ioapic_enable) 300 temp &= ~APIC_LVT_MASKED; 301 LAPIC_WRITE(lvt_lint1, temp); 302 303 /* 304 * Mask the LAPIC error interrupt, LAPIC performance counter 305 * interrupt. 306 */ 307 LAPIC_WRITE(lvt_error, LAPIC_READ(lvt_error) | APIC_LVT_MASKED); 308 LAPIC_WRITE(lvt_pcint, LAPIC_READ(lvt_pcint) | APIC_LVT_MASKED); 309 310 /* 311 * Set LAPIC timer vector and mask the LAPIC timer interrupt. 312 */ 313 timer = LAPIC_READ(lvt_timer); 314 timer &= ~APIC_LVTT_VECTOR; 315 timer |= XTIMER_OFFSET; 316 timer |= APIC_LVTT_MASKED; 317 LAPIC_WRITE(lvt_timer, timer); 318 319 /* 320 * Set the Task Priority Register as needed. At the moment allow 321 * interrupts on all cpus (the APs will remain CLId until they are 322 * ready to deal). 323 */ 324 temp = LAPIC_READ(tpr); 325 temp &= ~APIC_TPR_PRIO; /* clear priority field */ 326 LAPIC_WRITE(tpr, temp); 327 328 /* 329 * AMD specific setup 330 */ 331 if (cpu_vendor_id == CPU_VENDOR_AMD && lapic_mem != NULL && 332 (LAPIC_MEM_READ(version) & APIC_VER_AMD_EXT_SPACE)) { 333 uint32_t ext_feat; 334 uint32_t count; 335 uint32_t max_count; 336 uint32_t lvt; 337 uint32_t i; 338 339 ext_feat = LAPIC_MEM_READ(ext_feat); 340 count = (ext_feat & APIC_EXTFEAT_MASK) >> APIC_EXTFEAT_SHIFT; 341 max_count = sizeof(lapic_mem->ext_lvt) / 342 sizeof(lapic_mem->ext_lvt[0]); 343 if (count > max_count) 344 count = max_count; 345 for (i = 0; i < count; ++i) { 346 lvt = LAPIC_MEM_READ(ext_lvt[i].lvt); 347 348 lvt &= ~(APIC_LVT_POLARITY_MASK | APIC_LVT_TRIG_MASK | 349 APIC_LVT_DM_MASK | APIC_LVT_MASKED); 350 lvt |= APIC_LVT_MASKED | APIC_LVT_DM_FIXED; 351 352 switch(i) { 353 case APIC_EXTLVT_IBS: 354 break; 355 case APIC_EXTLVT_MCA: 356 break; 357 case APIC_EXTLVT_DEI: 358 break; 359 case APIC_EXTLVT_SBI: 360 break; 361 default: 362 break; 363 } 364 if (bsp) { 365 kprintf(" LAPIC AMD elvt%d: 0x%08x", 366 i, LAPIC_MEM_READ(ext_lvt[i].lvt)); 367 if (LAPIC_MEM_READ(ext_lvt[i].lvt) != lvt) 368 kprintf(" -> 0x%08x", lvt); 369 kprintf("\n"); 370 } 371 LAPIC_MEM_WRITE(ext_lvt[i].lvt, lvt); 372 } 373 } 374 375 /* 376 * Enable the LAPIC 377 */ 378 temp = LAPIC_READ(svr); 379 temp |= APIC_SVR_ENABLE; /* enable the LAPIC */ 380 temp &= ~APIC_SVR_FOCUS_DISABLE; /* enable lopri focus processor */ 381 382 if (LAPIC_READ(version) & APIC_VER_EOI_SUPP) { 383 if (temp & APIC_SVR_EOI_SUPP) { 384 temp &= ~APIC_SVR_EOI_SUPP; 385 if (bsp) 386 kprintf(" LAPIC disabling EOI supp\n"); 387 } 388 } 389 390 /* 391 * Set the spurious interrupt vector. The low 4 bits of the vector 392 * must be 1111. 393 */ 394 if ((XSPURIOUSINT_OFFSET & 0x0F) != 0x0F) 395 panic("bad XSPURIOUSINT_OFFSET: 0x%08x", XSPURIOUSINT_OFFSET); 396 temp &= ~APIC_SVR_VECTOR; 397 temp |= XSPURIOUSINT_OFFSET; 398 399 LAPIC_WRITE(svr, temp); 400 401 /* 402 * Pump out a few EOIs to clean out interrupts that got through 403 * before we were able to set the TPR. 404 */ 405 LAPIC_WRITE(eoi, 0); 406 LAPIC_WRITE(eoi, 0); 407 LAPIC_WRITE(eoi, 0); 408 409 if (bsp) { 410 lapic_timer_calibrate(); 411 if (lapic_timer_enable) { 412 if (cpu_thermal_feature & CPUID_THERMAL_ARAT) { 413 /* 414 * Local APIC timer will not stop 415 * in deep C-state. 416 */ 417 lapic_cputimer_intr.caps |= 418 CPUTIMER_INTR_CAP_PS; 419 } 420 if (lapic_use_tscdeadline) { 421 lapic_cputimer_intr.reload = 422 lapic_timer_tscdlt_reload; 423 } 424 cputimer_intr_register(&lapic_cputimer_intr); 425 cputimer_intr_select(&lapic_cputimer_intr, 0); 426 } 427 } else if (!lapic_use_tscdeadline) { 428 lapic_timer_set_divisor(lapic_timer_divisor_idx); 429 } 430 431 if (bootverbose) 432 apic_dump("apic_initialize()"); 433 } 434 435 static void 436 lapic_timer_set_divisor(int divisor_idx) 437 { 438 KKASSERT(divisor_idx >= 0 && divisor_idx < APIC_TIMER_NDIVISORS); 439 LAPIC_WRITE(dcr_timer, lapic_timer_divisors[divisor_idx]); 440 } 441 442 static void 443 lapic_timer_oneshot(u_int count) 444 { 445 uint32_t value; 446 447 value = LAPIC_READ(lvt_timer); 448 value &= ~(APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT); 449 LAPIC_WRITE(lvt_timer, value); 450 LAPIC_WRITE(icr_timer, count); 451 } 452 453 static void 454 lapic_timer_oneshot_quick(u_int count) 455 { 456 LAPIC_WRITE(icr_timer, count); 457 } 458 459 static void 460 lapic_timer_tscdeadline_quick(uint64_t diff) 461 { 462 uint64_t val = rdtsc() + diff; 463 464 wrmsr(MSR_TSC_DEADLINE, val); 465 tsc_deadlines[mycpuid].timestamp = val; 466 } 467 468 static uint64_t 469 lapic_scale_to_tsc(unsigned value, unsigned scale) 470 { 471 uint64_t val; 472 473 val = value; 474 val *= tsc_frequency; 475 val += (scale - 1); 476 val /= scale; 477 return val; 478 } 479 480 #define MAX_MEASURE_RETRIES 100 481 482 static u_int64_t 483 do_tsc_calibration(u_int us, u_int64_t apic_delay_tsc) 484 { 485 u_int64_t old_tsc1, old_tsc2, new_tsc1, new_tsc2; 486 u_int64_t diff, count; 487 u_int64_t a; 488 u_int32_t start, end; 489 int retries1 = 0, retries2 = 0; 490 491 retry1: 492 lapic_timer_oneshot_quick(APIC_TIMER_MAX_COUNT); 493 old_tsc1 = rdtsc_ordered(); 494 start = LAPIC_READ(ccr_timer); 495 old_tsc2 = rdtsc_ordered(); 496 if (apic_delay_tsc > 0 && retries1 < MAX_MEASURE_RETRIES && 497 old_tsc2 - old_tsc1 > 2 * apic_delay_tsc) { 498 retries1++; 499 goto retry1; 500 } 501 DELAY(us); 502 retry2: 503 new_tsc1 = rdtsc_ordered(); 504 end = LAPIC_READ(ccr_timer); 505 new_tsc2 = rdtsc_ordered(); 506 if (apic_delay_tsc > 0 && retries2 < MAX_MEASURE_RETRIES && 507 new_tsc2 - new_tsc1 > 2 * apic_delay_tsc) { 508 retries2++; 509 goto retry2; 510 } 511 if (end == 0) 512 return 0; 513 514 count = start - end; 515 516 /* Make sure the lapic can count for up to 2s */ 517 a = (unsigned)APIC_TIMER_MAX_COUNT; 518 if (us < 2000000 && (u_int64_t)count * 2000000 >= a * us) 519 return 0; 520 521 if (lapic_calibrate_test > 0 && (retries1 > 0 || retries2 > 0)) { 522 kprintf("%s: retries1=%d retries2=%d\n", 523 __func__, retries1, retries2); 524 } 525 526 diff = (new_tsc1 - old_tsc1) + (new_tsc2 - old_tsc2); 527 /* XXX First estimate if the total TSC diff value makes sense */ 528 /* This will almost overflow, but only almost :) */ 529 count = (2 * count * tsc_frequency) / diff; 530 531 return count; 532 } 533 534 static uint64_t 535 do_cputimer_calibration(u_int us) 536 { 537 sysclock_t value; 538 sysclock_t start, end, beginning, finish; 539 540 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT); 541 beginning = LAPIC_READ(ccr_timer); 542 start = sys_cputimer->count(); 543 DELAY(us); 544 end = sys_cputimer->count(); 545 finish = LAPIC_READ(ccr_timer); 546 if (finish == 0) 547 return 0; 548 /* value is the LAPIC timer difference. */ 549 value = beginning - finish; 550 /* end is the sys_cputimer difference. */ 551 end -= start; 552 if (end == 0) 553 return 0; 554 value = ((uint64_t)value * sys_cputimer->freq) / end; 555 return value; 556 } 557 558 static void 559 lapic_timer_calibrate(void) 560 { 561 sysclock_t value; 562 u_int64_t apic_delay_tsc = 0; 563 int use_tsc_calibration = 0; 564 565 /* No need to calibrate lapic_timer, if we will use TSC Deadline mode */ 566 if (lapic_use_tscdeadline) { 567 lapic_timer_tscfreq_shift = 0; 568 while ((tsc_frequency >> lapic_timer_tscfreq_shift) > INT_MAX) 569 lapic_timer_tscfreq_shift++; 570 lapic_cputimer_intr.freq = 571 tsc_frequency >> lapic_timer_tscfreq_shift; 572 kprintf( 573 "lapic: TSC Deadline Mode: shift %d, frequency %u Hz\n", 574 lapic_timer_tscfreq_shift, lapic_cputimer_intr.freq); 575 return; 576 } 577 578 /* 579 * On real hardware, tsc_invariant == 0 wouldn't be an issue, but in 580 * a virtual machine the frequency may get changed by the host. 581 */ 582 if (tsc_frequency != 0 && tsc_invariant && lapic_calibrate_fast) 583 use_tsc_calibration = 1; 584 585 if (use_tsc_calibration) { 586 u_int64_t min_apic_tsc = 0, max_apic_tsc = 0; 587 u_int64_t old_tsc, new_tsc; 588 sysclock_t val; 589 int i; 590 591 /* warm up */ 592 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT); 593 for (i = 0; i < 10; i++) 594 val = LAPIC_READ(ccr_timer); 595 596 for (i = 0; i < 100; i++) { 597 old_tsc = rdtsc_ordered(); 598 val = LAPIC_READ(ccr_timer); 599 new_tsc = rdtsc_ordered(); 600 new_tsc -= old_tsc; 601 apic_delay_tsc += new_tsc; 602 if (min_apic_tsc == 0 || 603 min_apic_tsc > new_tsc) { 604 min_apic_tsc = new_tsc; 605 } 606 if (max_apic_tsc < new_tsc) 607 max_apic_tsc = new_tsc; 608 } 609 apic_delay_tsc /= 100; 610 kprintf( 611 "LAPIC latency (in TSC ticks): %lu min: %lu max: %lu\n", 612 apic_delay_tsc, min_apic_tsc, max_apic_tsc); 613 apic_delay_tsc = min_apic_tsc; 614 } 615 616 if (!use_tsc_calibration) { 617 int i; 618 619 /* 620 * Do some exercising of the lapic timer access. This improves 621 * precision of the subsequent calibration run in at least some 622 * virtualization cases. 623 */ 624 lapic_timer_set_divisor(0); 625 for (i = 0; i < 10; i++) 626 (void)do_cputimer_calibration(100); 627 } 628 /* Try to calibrate the local APIC timer. */ 629 for (lapic_timer_divisor_idx = 0; 630 lapic_timer_divisor_idx < APIC_TIMER_NDIVISORS; 631 lapic_timer_divisor_idx++) { 632 lapic_timer_set_divisor(lapic_timer_divisor_idx); 633 if (use_tsc_calibration) { 634 value = do_tsc_calibration(200*1000, apic_delay_tsc); 635 } else { 636 value = do_cputimer_calibration(2*1000*1000); 637 } 638 if (value != 0) 639 break; 640 } 641 if (lapic_timer_divisor_idx >= APIC_TIMER_NDIVISORS) 642 panic("lapic: no proper timer divisor?!"); 643 lapic_cputimer_intr.freq = value; 644 645 kprintf("lapic: divisor index %d, frequency %u Hz\n", 646 lapic_timer_divisor_idx, lapic_cputimer_intr.freq); 647 648 if (lapic_calibrate_test > 0) { 649 uint64_t freq; 650 int i; 651 652 for (i = 1; i <= 20; i++) { 653 if (use_tsc_calibration) { 654 freq = do_tsc_calibration(i*100*1000, 655 apic_delay_tsc); 656 } else { 657 freq = do_cputimer_calibration(i*100*1000); 658 } 659 if (freq != 0) 660 kprintf("%ums: %lu\n", i * 100, freq); 661 } 662 } 663 } 664 665 static void 666 lapic_timer_tscdlt_reload(struct cputimer_intr *cti, sysclock_t reload) 667 { 668 struct globaldata *gd = mycpu; 669 uint64_t diff, now, val; 670 671 if (reload > 1000*1000*1000) 672 reload = 1000*1000*1000; 673 diff = (uint64_t)reload * tsc_frequency / sys_cputimer->freq; 674 if (diff < 4) 675 diff = 4; 676 if (cpu_vendor_id == CPU_VENDOR_INTEL) 677 cpu_lfence(); 678 else 679 cpu_mfence(); 680 now = rdtsc(); 681 val = now + diff; 682 if (gd->gd_timer_running) { 683 uint64_t deadline = tsc_deadlines[mycpuid].timestamp; 684 if (deadline == 0 || now > deadline || val < deadline) { 685 wrmsr(MSR_TSC_DEADLINE, val); 686 tsc_deadlines[mycpuid].timestamp = val; 687 } 688 } else { 689 gd->gd_timer_running = 1; 690 wrmsr(MSR_TSC_DEADLINE, val); 691 tsc_deadlines[mycpuid].timestamp = val; 692 } 693 } 694 695 static void 696 lapic_mem_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload) 697 { 698 struct globaldata *gd = mycpu; 699 700 reload = (int64_t)reload * cti->freq / sys_cputimer->freq; 701 if (reload < 2) 702 reload = 2; 703 704 if (gd->gd_timer_running) { 705 if (reload < LAPIC_MEM_READ(ccr_timer)) 706 LAPIC_MEM_WRITE(icr_timer, reload); 707 } else { 708 gd->gd_timer_running = 1; 709 LAPIC_MEM_WRITE(icr_timer, reload); 710 } 711 } 712 713 static void 714 lapic_msr_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload) 715 { 716 struct globaldata *gd = mycpu; 717 718 reload = (int64_t)reload * cti->freq / sys_cputimer->freq; 719 if (reload < 2) 720 reload = 2; 721 722 if (gd->gd_timer_running) { 723 if (reload < LAPIC_MSR_READ(MSR_X2APIC_CCR_TIMER)) 724 LAPIC_MSR_WRITE(MSR_X2APIC_ICR_TIMER, reload); 725 } else { 726 gd->gd_timer_running = 1; 727 LAPIC_MSR_WRITE(MSR_X2APIC_ICR_TIMER, reload); 728 } 729 } 730 731 static void 732 lapic_timer_intr_enable(struct cputimer_intr *cti __unused) 733 { 734 uint32_t timer; 735 736 timer = LAPIC_READ(lvt_timer); 737 timer &= ~(APIC_LVTT_MASKED | APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT); 738 if (lapic_use_tscdeadline) 739 timer |= APIC_LVTT_TSCDLT; 740 LAPIC_WRITE(lvt_timer, timer); 741 if (lapic_use_tscdeadline) 742 cpu_mfence(); 743 744 lapic_timer_fixup_handler(NULL); 745 } 746 747 static void 748 lapic_timer_fixup_handler(void *arg) 749 { 750 int *started = arg; 751 752 if (started != NULL) 753 *started = 0; 754 755 if (cpu_vendor_id == CPU_VENDOR_AMD) { 756 int c1e_test = lapic_timer_c1e_test; 757 758 if (c1e_test < 0) { 759 if (vmm_guest == VMM_GUEST_NONE) { 760 c1e_test = 1; 761 } else { 762 /* 763 * Don't do this C1E testing and adjustment 764 * on virtual machines, the best case for 765 * accessing this MSR is a NOOP; the worst 766 * cases could be pretty nasty, e.g. crash. 767 */ 768 c1e_test = 0; 769 } 770 } 771 772 /* 773 * Detect the presence of C1E capability mostly on latest 774 * dual-cores (or future) k8 family. This feature renders 775 * the local APIC timer dead, so we disable it by reading 776 * the Interrupt Pending Message register and clearing both 777 * C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27). 778 * 779 * Reference: 780 * "BIOS and Kernel Developer's Guide for AMD NPT 781 * Family 0Fh Processors" 782 * #32559 revision 3.00 783 */ 784 if ((cpu_id & 0x00000f00) == 0x00000f00 && 785 (cpu_id & 0x0fff0000) >= 0x00040000 && 786 c1e_test) { 787 uint64_t msr; 788 789 msr = rdmsr(0xc0010055); 790 if (msr & 0x18000000) { 791 struct globaldata *gd = mycpu; 792 793 kprintf("cpu%d: AMD C1E detected\n", 794 gd->gd_cpuid); 795 wrmsr(0xc0010055, msr & ~0x18000000ULL); 796 797 /* 798 * We are kinda stalled; 799 * kick start again. 800 */ 801 gd->gd_timer_running = 1; 802 if (lapic_use_tscdeadline) { 803 /* Maybe reached in Virtual Machines? */ 804 lapic_timer_tscdeadline_quick(5000); 805 } else { 806 lapic_timer_oneshot_quick(2); 807 } 808 809 if (started != NULL) 810 *started = 1; 811 } 812 } 813 } 814 } 815 816 static void 817 lapic_timer_restart_handler(void *dummy __unused) 818 { 819 int started; 820 821 lapic_timer_fixup_handler(&started); 822 if (!started) { 823 struct globaldata *gd = mycpu; 824 825 gd->gd_timer_running = 1; 826 if (lapic_use_tscdeadline) { 827 /* Maybe reached in Virtual Machines? */ 828 lapic_timer_tscdeadline_quick(5000); 829 } else { 830 lapic_timer_oneshot_quick(2); 831 } 832 } 833 } 834 835 /* 836 * This function is called only by ACPICA code currently: 837 * - AMD C1E fixup. AMD C1E only seems to happen after ACPI 838 * module controls PM. So once ACPICA is attached, we try 839 * to apply the fixup to prevent LAPIC timer from hanging. 840 */ 841 static void 842 lapic_timer_intr_pmfixup(struct cputimer_intr *cti __unused) 843 { 844 lwkt_send_ipiq_mask(smp_active_mask, 845 lapic_timer_fixup_handler, NULL); 846 } 847 848 static void 849 lapic_timer_intr_restart(struct cputimer_intr *cti __unused) 850 { 851 lwkt_send_ipiq_mask(smp_active_mask, lapic_timer_restart_handler, NULL); 852 } 853 854 855 /* 856 * dump contents of local APIC registers 857 */ 858 void 859 apic_dump(char* str) 860 { 861 kprintf("SMP: CPU%d %s:\n", mycpu->gd_cpuid, str); 862 kprintf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", 863 LAPIC_READ(lvt_lint0), LAPIC_READ(lvt_lint1), LAPIC_READ(tpr), 864 LAPIC_READ(svr)); 865 } 866 867 /* 868 * Inter Processor Interrupt functions. 869 */ 870 871 static __inline void 872 lapic_mem_icr_unpend(const char *func) 873 { 874 if (LAPIC_MEM_READ(icr_lo) & APIC_DELSTAT_PEND) { 875 int64_t tsc; 876 int loops = 1; 877 878 tsc = rdtsc(); 879 while (LAPIC_MEM_READ(icr_lo) & APIC_DELSTAT_PEND) { 880 cpu_pause(); 881 if ((tsc_sclock_t)(rdtsc() - 882 (tsc + tsc_frequency)) > 0) { 883 tsc = rdtsc(); 884 if (++loops > 30) { 885 panic("%s: cpu%d apic stalled", 886 func, mycpuid); 887 } else { 888 kprintf("%s: cpu%d apic stalled\n", 889 func, mycpuid); 890 } 891 } 892 } 893 } 894 } 895 896 /* 897 * Send APIC IPI 'vector' to 'destType' via 'deliveryMode'. 898 * 899 * destType is 1 of: APIC_DEST_SELF, APIC_DEST_ALLISELF, APIC_DEST_ALLESELF 900 * vector is any valid SYSTEM INT vector 901 * delivery_mode is 1 of: APIC_DELMODE_FIXED, APIC_DELMODE_LOWPRIO 902 * 903 * WARNINGS! 904 * 905 * We now implement a per-cpu interlock (gd->gd_npoll) to prevent more than 906 * one IPI from being sent to any given cpu at a time. Thus we no longer 907 * have to process incoming IPIs while waiting for the status to clear. 908 * No deadlock should be possible. 909 * 910 * We now physically disable interrupts for the lapic ICR operation. If 911 * we do not do this then it looks like an EOI sent to the lapic (which 912 * occurs even with a critical section) can interfere with the command 913 * register ready status and cause an IPI to be lost. 914 * 915 * e.g. an interrupt can occur, issue the EOI, IRET, and cause the command 916 * register to busy just before we write to icr_lo, resulting in a lost 917 * issuance. This only appears to occur on Intel cpus and is not 918 * documented. It could simply be that cpus are so fast these days that 919 * it was always an issue, but is only now rearing its ugly head. This 920 * is conjecture. 921 */ 922 static int 923 lapic_mem_ipi(int dest_type, int vector, int delivery_mode) 924 { 925 lapic_mem_icr_unpend(__func__); 926 lapic_mem_icr_set(0, 927 dest_type | APIC_LEVEL_ASSERT | delivery_mode | vector); 928 return 0; 929 } 930 931 static int 932 lapic_msr_ipi(int dest_type, int vector, int delivery_mode) 933 { 934 lapic_msr_icr_set(0, 935 dest_type | APIC_LEVEL_ASSERT | delivery_mode | vector); 936 return 0; 937 } 938 939 /* 940 * Interrupts must be hard-disabled by caller 941 */ 942 static void 943 lapic_mem_single_ipi(int cpu, int vector, int delivery_mode) 944 { 945 lapic_mem_icr_unpend(__func__); 946 lapic_mem_icr_set(CPUID_TO_APICID(cpu), 947 APIC_DEST_DESTFLD | APIC_LEVEL_ASSERT | delivery_mode | vector); 948 } 949 950 static void 951 lapic_msr_single_ipi(int cpu, int vector, int delivery_mode) 952 { 953 lapic_msr_icr_set(CPUID_TO_APICID(cpu), 954 APIC_DEST_DESTFLD | APIC_LEVEL_ASSERT | delivery_mode | vector); 955 } 956 957 /* 958 * Send APIC IPI 'vector' to 'target's via 'delivery_mode'. 959 * 960 * target is a bitmask of destination cpus. Vector is any 961 * valid system INT vector. Delivery mode may be either 962 * APIC_DELMODE_FIXED or APIC_DELMODE_LOWPRIO. 963 * 964 * Interrupts must be hard-disabled by caller 965 */ 966 void 967 selected_apic_ipi(cpumask_t target, int vector, int delivery_mode) 968 { 969 while (CPUMASK_TESTNZERO(target)) { 970 int n = BSFCPUMASK(target); 971 CPUMASK_NANDBIT(target, n); 972 single_apic_ipi(n, vector, delivery_mode); 973 } 974 } 975 976 /* 977 * Load a 'downcount time' in uSeconds. 978 */ 979 void 980 set_apic_timer(int us) 981 { 982 u_int count; 983 984 if (lapic_use_tscdeadline) { 985 uint64_t val; 986 987 val = lapic_scale_to_tsc(us, 1000000); 988 val += rdtsc(); 989 /* No need to arm the lapic here, just track the timeout. */ 990 tsc_deadlines[mycpuid].downcount_time = val; 991 return; 992 } 993 994 /* 995 * When we reach here, lapic timer's frequency 996 * must have been calculated as well as the 997 * divisor (lapic->dcr_timer is setup during the 998 * divisor calculation). 999 */ 1000 KKASSERT(lapic_cputimer_intr.freq != 0 && 1001 lapic_timer_divisor_idx >= 0); 1002 1003 count = ((us * (int64_t)lapic_cputimer_intr.freq) + 999999) / 1000000; 1004 lapic_timer_oneshot(count); 1005 } 1006 1007 1008 /* 1009 * Read remaining time in timer, in microseconds (rounded up). 1010 */ 1011 int 1012 read_apic_timer(void) 1013 { 1014 uint64_t val; 1015 1016 if (lapic_use_tscdeadline) { 1017 uint64_t now; 1018 1019 val = tsc_deadlines[mycpuid].downcount_time; 1020 now = rdtsc(); 1021 if (val == 0 || now > val) { 1022 return 0; 1023 } else { 1024 val -= now; 1025 val *= 1000000; 1026 val += (tsc_frequency - 1); 1027 val /= tsc_frequency; 1028 if (val > INT_MAX) 1029 val = INT_MAX; 1030 return val; 1031 } 1032 } 1033 1034 val = LAPIC_READ(ccr_timer); 1035 if (val == 0) 1036 return 0; 1037 1038 KKASSERT(lapic_cputimer_intr.freq > 0); 1039 val *= 1000000; 1040 val += (lapic_cputimer_intr.freq - 1); 1041 val /= lapic_cputimer_intr.freq; 1042 if (val > INT_MAX) 1043 val = INT_MAX; 1044 return val; 1045 } 1046 1047 1048 /* 1049 * Spin-style delay, set delay time in uS, spin till it drains. 1050 */ 1051 void 1052 u_sleep(int count) 1053 { 1054 set_apic_timer(count); 1055 while (read_apic_timer()) 1056 /* spin */ ; 1057 } 1058 1059 int 1060 lapic_unused_apic_id(int start) 1061 { 1062 int i; 1063 1064 for (i = start; i < APICID_MAX; ++i) { 1065 if (APICID_TO_CPUID(i) == -1) 1066 return i; 1067 } 1068 return NAPICID; 1069 } 1070 1071 void 1072 lapic_map(vm_paddr_t lapic_addr) 1073 { 1074 lapic_mem = pmap_mapdev_uncacheable(lapic_addr, sizeof(struct LAPIC)); 1075 } 1076 1077 void 1078 lapic_x2apic_enter(boolean_t bsp) 1079 { 1080 uint64_t apic_base; 1081 1082 KASSERT(x2apic_enable, ("X2APIC mode is not enabled")); 1083 1084 /* 1085 * X2APIC mode is requested, if it has not been enabled by the BIOS, 1086 * enable it now. 1087 */ 1088 apic_base = rdmsr(MSR_APICBASE); 1089 if ((apic_base & APICBASE_X2APIC) == 0) { 1090 wrmsr(MSR_APICBASE, 1091 apic_base | APICBASE_X2APIC | APICBASE_ENABLED); 1092 } 1093 if (bsp) { 1094 lapic_eoi = lapic_msr_eoi; 1095 apic_ipi = lapic_msr_ipi; 1096 single_apic_ipi = lapic_msr_single_ipi; 1097 lapic_cputimer_intr.reload = lapic_msr_timer_intr_reload; 1098 } 1099 } 1100 1101 static TAILQ_HEAD(, lapic_enumerator) lapic_enumerators = 1102 TAILQ_HEAD_INITIALIZER(lapic_enumerators); 1103 1104 int 1105 lapic_config(void) 1106 { 1107 struct lapic_enumerator *e; 1108 uint64_t apic_base; 1109 int error, i, ap_max; 1110 1111 KKASSERT(lapic_enable); 1112 1113 lapic_eoi = lapic_mem_eoi; 1114 apic_ipi = lapic_mem_ipi; 1115 single_apic_ipi = lapic_mem_single_ipi; 1116 1117 TUNABLE_INT_FETCH("hw.x2apic_enable", &x2apic_enable); 1118 if (x2apic_enable < 0) 1119 x2apic_enable = 1; 1120 1121 if ((cpu_feature2 & CPUID2_X2APIC) == 0) { 1122 /* X2APIC is not supported. */ 1123 x2apic_enable = 0; 1124 } else if (!x2apic_enable) { 1125 /* 1126 * If the BIOS enabled the X2APIC mode, then we would stick 1127 * with the X2APIC mode. 1128 */ 1129 apic_base = rdmsr(MSR_APICBASE); 1130 if (apic_base & APICBASE_X2APIC) { 1131 kprintf("LAPIC: BIOS enabled X2APIC mode\n"); 1132 x2apic_enable = 1; 1133 } 1134 } 1135 1136 if (x2apic_enable) { 1137 /* 1138 * Enter X2APIC mode. 1139 */ 1140 kprintf("LAPIC: enter X2APIC mode\n"); 1141 lapic_x2apic_enter(TRUE); 1142 } 1143 1144 for (i = 0; i < NAPICID; ++i) 1145 APICID_TO_CPUID(i) = -1; 1146 1147 TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) { 1148 error = e->lapic_probe(e); 1149 if (!error) 1150 break; 1151 } 1152 if (e == NULL) { 1153 kprintf("LAPIC: Can't find LAPIC\n"); 1154 return ENXIO; 1155 } 1156 1157 error = e->lapic_enumerate(e); 1158 if (error) { 1159 kprintf("LAPIC: enumeration failed\n"); 1160 return ENXIO; 1161 } 1162 1163 /* LAPIC is usable now. */ 1164 lapic_usable = 1; 1165 1166 ap_max = MAXCPU - 1; 1167 TUNABLE_INT_FETCH("hw.ap_max", &ap_max); 1168 if (ap_max > MAXCPU - 1) 1169 ap_max = MAXCPU - 1; 1170 1171 if (naps > ap_max) { 1172 kprintf("LAPIC: Warning use only %d out of %d " 1173 "available APs\n", 1174 ap_max, naps); 1175 naps = ap_max; 1176 } 1177 1178 return 0; 1179 } 1180 1181 void 1182 lapic_enumerator_register(struct lapic_enumerator *ne) 1183 { 1184 struct lapic_enumerator *e; 1185 1186 TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) { 1187 if (e->lapic_prio < ne->lapic_prio) { 1188 TAILQ_INSERT_BEFORE(e, ne, lapic_link); 1189 return; 1190 } 1191 } 1192 TAILQ_INSERT_TAIL(&lapic_enumerators, ne, lapic_link); 1193 } 1194 1195 void 1196 lapic_set_cpuid(int cpu_id, int apic_id) 1197 { 1198 CPUID_TO_APICID(cpu_id) = apic_id; 1199 APICID_TO_CPUID(apic_id) = cpu_id; 1200 } 1201 1202 void 1203 lapic_fixup_noioapic(void) 1204 { 1205 u_int temp; 1206 1207 /* Only allowed on BSP */ 1208 KKASSERT(mycpuid == 0); 1209 KKASSERT(!ioapic_enable); 1210 1211 temp = LAPIC_READ(lvt_lint0); 1212 temp &= ~APIC_LVT_MASKED; 1213 LAPIC_WRITE(lvt_lint0, temp); 1214 1215 temp = LAPIC_READ(lvt_lint1); 1216 temp |= APIC_LVT_MASKED; 1217 LAPIC_WRITE(lvt_lint1, temp); 1218 } 1219 1220 static void 1221 lapic_mem_eoi(void) 1222 { 1223 log_lapic(mem_eoi); 1224 LAPIC_MEM_WRITE(eoi, 0); 1225 } 1226 1227 static void 1228 lapic_msr_eoi(void) 1229 { 1230 log_lapic(msr_eoi); 1231 LAPIC_MSR_WRITE(MSR_X2APIC_EOI, 0); 1232 } 1233 1234 static void 1235 lapic_mem_seticr_sync(uint32_t apic_id, uint32_t icr_lo_val) 1236 { 1237 lapic_mem_icr_set(apic_id, icr_lo_val); 1238 while (LAPIC_MEM_READ(icr_lo) & APIC_DELSTAT_PEND) 1239 /* spin */; 1240 } 1241 1242 void 1243 lapic_seticr_sync(uint32_t apic_id, uint32_t icr_lo_val) 1244 { 1245 if (x2apic_enable) 1246 lapic_msr_icr_set(apic_id, icr_lo_val); 1247 else 1248 lapic_mem_seticr_sync(apic_id, icr_lo_val); 1249 } 1250 1251 static void 1252 lapic_sysinit(void *dummy __unused) 1253 { 1254 if (lapic_enable) { 1255 int error; 1256 1257 error = lapic_config(); 1258 if (error) 1259 lapic_enable = 0; 1260 } 1261 if (!lapic_enable) 1262 x2apic_enable = 0; 1263 1264 if (lapic_enable) { 1265 /* Initialize BSP's local APIC */ 1266 lapic_init(TRUE); 1267 } else if (ioapic_enable) { 1268 ioapic_enable = 0; 1269 icu_reinit_noioapic(); 1270 } 1271 } 1272 SYSINIT(lapic, SI_BOOT2_LAPIC, SI_ORDER_FIRST, lapic_sysinit, NULL); 1273