1 /* 2 * Copyright (c) 1996, by Steve Passe 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. The name of the developer may NOT be used to endorse or promote products 11 * derived from this software without specific prior written permission. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: src/sys/i386/i386/mpapic.c,v 1.37.2.7 2003/01/25 02:31:47 peter Exp $ 26 */ 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/kernel.h> 31 #include <sys/ktr.h> 32 #include <sys/bus.h> 33 #include <sys/machintr.h> 34 #include <sys/sysctl.h> 35 #include <machine/globaldata.h> 36 #include <machine/clock.h> 37 #include <machine/limits.h> 38 #include <machine/smp.h> 39 #include <machine/md_var.h> 40 #include <machine/pmap.h> 41 #include <machine/specialreg.h> 42 #include <machine_base/apic/lapic.h> 43 #include <machine_base/apic/ioapic.h> 44 #include <machine_base/apic/ioapic_abi.h> 45 #include <machine_base/apic/apicvar.h> 46 #include <machine_base/icu/icu_var.h> 47 #include <machine/segments.h> 48 #include <sys/thread2.h> 49 #include <sys/spinlock2.h> 50 51 #include <machine/cputypes.h> 52 #include <machine/intr_machdep.h> 53 54 #if !defined(KTR_LAPIC) 55 #define KTR_LAPIC KTR_ALL 56 #endif 57 KTR_INFO_MASTER(lapic); 58 KTR_INFO(KTR_LAPIC, lapic, mem_eoi, 0, "mem_eoi"); 59 KTR_INFO(KTR_LAPIC, lapic, msr_eoi, 0, "msr_eoi"); 60 #define log_lapic(name) KTR_LOG(lapic_ ## name) 61 62 extern int naps; 63 64 volatile lapic_t *lapic_mem; 65 66 static void lapic_timer_calibrate(void); 67 static void lapic_timer_set_divisor(int); 68 static void lapic_timer_fixup_handler(void *); 69 static void lapic_timer_restart_handler(void *); 70 71 72 static int lapic_timer_enable = 1; 73 TUNABLE_INT("hw.lapic_timer_enable", &lapic_timer_enable); 74 75 static int lapic_timer_tscdeadline = 1; 76 TUNABLE_INT("hw.lapic_timer_tscdeadline", &lapic_timer_tscdeadline); 77 78 static int lapic_calibrate_test = 0; 79 TUNABLE_INT("hw.lapic_calibrate_test", &lapic_calibrate_test); 80 81 static int lapic_calibrate_fast = 1; 82 TUNABLE_INT("hw.lapic_calibrate_fast", &lapic_calibrate_fast); 83 84 static void lapic_timer_tscdlt_reload(struct cputimer_intr *, sysclock_t); 85 static void lapic_mem_timer_intr_reload(struct cputimer_intr *, sysclock_t); 86 static void lapic_msr_timer_intr_reload(struct cputimer_intr *, sysclock_t); 87 static void lapic_timer_intr_enable(struct cputimer_intr *); 88 static void lapic_timer_intr_restart(struct cputimer_intr *); 89 static void lapic_timer_intr_pmfixup(struct cputimer_intr *); 90 91 static struct cputimer_intr lapic_cputimer_intr = { 92 .freq = 0, 93 .reload = lapic_mem_timer_intr_reload, 94 .enable = lapic_timer_intr_enable, 95 .config = cputimer_intr_default_config, 96 .restart = lapic_timer_intr_restart, 97 .pmfixup = lapic_timer_intr_pmfixup, 98 .initclock = cputimer_intr_default_initclock, 99 .pcpuhand = NULL, 100 .next = SLIST_ENTRY_INITIALIZER, 101 .name = "lapic", 102 .type = CPUTIMER_INTR_LAPIC, 103 .prio = CPUTIMER_INTR_PRIO_LAPIC, 104 .caps = CPUTIMER_INTR_CAP_NONE, 105 .priv = NULL 106 }; 107 108 static int lapic_timer_divisor_idx = -1; 109 static const uint32_t lapic_timer_divisors[] = { 110 APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, 111 APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128, APIC_TDCR_1 112 }; 113 #define APIC_TIMER_NDIVISORS (int)(NELEM(lapic_timer_divisors)) 114 115 static int lapic_use_tscdeadline = 0; 116 /* The raw TSC frequency might not fit into a sysclock_t value. */ 117 static int lapic_timer_tscfreq_shift; 118 119 /* 120 * APIC ID <-> CPU ID mapping structures. 121 */ 122 int cpu_id_to_apic_id[NAPICID]; 123 int apic_id_to_cpu_id[NAPICID]; 124 int lapic_enable = 1; 125 int lapic_usable = 0; 126 int x2apic_enable = 0; 127 128 SYSCTL_INT(_hw, OID_AUTO, x2apic_enable, CTLFLAG_RD, &x2apic_enable, 0, ""); 129 130 /* Separate cachelines for each cpu's info. */ 131 struct deadlines { 132 uint64_t timestamp; 133 uint64_t downcount_time; 134 uint64_t padding[6]; 135 }; 136 struct deadlines *tsc_deadlines = NULL; 137 138 static void lapic_mem_eoi(void); 139 static int lapic_mem_ipi(int dest_type, int vector, int delivery_mode); 140 static void lapic_mem_single_ipi(int cpu, int vector, int delivery_mode); 141 142 static void lapic_msr_eoi(void); 143 static int lapic_msr_ipi(int dest_type, int vector, int delivery_mode); 144 static void lapic_msr_single_ipi(int cpu, int vector, int delivery_mode); 145 146 void (*lapic_eoi)(void); 147 int (*apic_ipi)(int dest_type, int vector, int delivery_mode); 148 void (*single_apic_ipi)(int cpu, int vector, int delivery_mode); 149 150 static __inline void 151 lapic_mem_icr_set(uint32_t apic_id, uint32_t icr_lo_val) 152 { 153 uint32_t icr_lo, icr_hi; 154 155 icr_hi = (LAPIC_MEM_READ(icr_hi) & ~APIC_ID_MASK) | 156 (apic_id << APIC_ID_SHIFT); 157 icr_lo = (LAPIC_MEM_READ(icr_lo) & APIC_ICRLO_RESV_MASK) | icr_lo_val; 158 159 LAPIC_MEM_WRITE(icr_hi, icr_hi); 160 LAPIC_MEM_WRITE(icr_lo, icr_lo); 161 } 162 163 static __inline void 164 lapic_msr_icr_set(uint32_t apic_id, uint32_t icr_lo_val) 165 { 166 LAPIC_MSR_WRITE(MSR_X2APIC_ICR, 167 ((uint64_t)apic_id << 32) | ((uint64_t)icr_lo_val)); 168 } 169 170 /* 171 * Enable LAPIC, configure interrupts. 172 */ 173 void 174 lapic_init(boolean_t bsp) 175 { 176 uint32_t timer; 177 u_int temp; 178 179 if (bsp) { 180 /* Decide whether we want to use TSC Deadline mode. */ 181 if (lapic_timer_tscdeadline != 0 && 182 (cpu_feature2 & CPUID2_TSCDLT) && 183 tsc_invariant && tsc_frequency != 0) { 184 lapic_use_tscdeadline = 1; 185 tsc_deadlines = kmalloc_cachealign( 186 sizeof(struct deadlines) * (naps + 1), 187 M_DEVBUF, M_WAITOK | M_ZERO); 188 } 189 } 190 191 /* 192 * Install vectors 193 * 194 * Since IDT is shared between BSP and APs, these vectors 195 * only need to be installed once; we do it on BSP. 196 */ 197 if (bsp) { 198 if (cpu_vendor_id == CPU_VENDOR_AMD && 199 CPUID_TO_FAMILY(cpu_id) >= 0x0f && 200 CPUID_TO_FAMILY(cpu_id) < 0x17) { /* XXX */ 201 uint32_t tcr; 202 203 /* 204 * Set the LINTEN bit in the HyperTransport 205 * Transaction Control Register. 206 * 207 * This will cause EXTINT and NMI interrupts 208 * routed over the hypertransport bus to be 209 * fed into the LAPIC LINT0/LINT1. If the bit 210 * isn't set, the interrupts will go to the 211 * general cpu INTR/NMI pins. On a dual-core 212 * cpu the interrupt winds up going to BOTH cpus. 213 * The first cpu that does the interrupt ack 214 * cycle will get the correct interrupt. The 215 * second cpu that does it will get a spurious 216 * interrupt vector (typically IRQ 7). 217 */ 218 outl(0x0cf8, 219 (1 << 31) | /* enable */ 220 (0 << 16) | /* bus */ 221 (0x18 << 11) | /* dev (cpu + 0x18) */ 222 (0 << 8) | /* func */ 223 0x68 /* reg */ 224 ); 225 tcr = inl(0xcfc); 226 if ((tcr & 0x00010000) == 0) { 227 kprintf("LAPIC: AMD LINTEN on\n"); 228 outl(0xcfc, tcr|0x00010000); 229 } 230 outl(0x0cf8, 0); 231 } 232 233 /* Install a 'Spurious INTerrupt' vector */ 234 setidt_global(XSPURIOUSINT_OFFSET, Xspuriousint, 235 SDT_SYSIGT, SEL_KPL, 0); 236 237 /* Install a timer vector */ 238 setidt_global(XTIMER_OFFSET, Xtimer, 239 SDT_SYSIGT, SEL_KPL, 0); 240 241 /* Install an inter-CPU IPI for TLB invalidation */ 242 setidt_global(XINVLTLB_OFFSET, Xinvltlb, 243 SDT_SYSIGT, SEL_KPL, 0); 244 245 /* Install an inter-CPU IPI for IPIQ messaging */ 246 setidt_global(XIPIQ_OFFSET, Xipiq, 247 SDT_SYSIGT, SEL_KPL, 0); 248 249 /* Install an inter-CPU IPI for CPU stop/restart */ 250 setidt_global(XCPUSTOP_OFFSET, Xcpustop, 251 SDT_SYSIGT, SEL_KPL, 0); 252 253 /* Install an inter-CPU IPI for TLB invalidation */ 254 setidt_global(XSNIFF_OFFSET, Xsniff, 255 SDT_SYSIGT, SEL_KPL, 0); 256 } 257 258 /* 259 * Setup LINT0 as ExtINT on the BSP. This is theoretically an 260 * aggregate interrupt input from the 8259. The INTA cycle 261 * will be routed to the external controller (the 8259) which 262 * is expected to supply the vector. 263 * 264 * Must be setup edge triggered, active high. 265 * 266 * Disable LINT0 on BSP, if I/O APIC is enabled. 267 * 268 * Disable LINT0 on the APs. It doesn't matter what delivery 269 * mode we use because we leave it masked. 270 */ 271 temp = LAPIC_READ(lvt_lint0); 272 temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | 273 APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); 274 if (bsp) { 275 temp |= APIC_LVT_DM_EXTINT; 276 if (ioapic_enable) 277 temp |= APIC_LVT_MASKED; 278 } else { 279 temp |= APIC_LVT_DM_FIXED | APIC_LVT_MASKED; 280 } 281 LAPIC_WRITE(lvt_lint0, temp); 282 283 /* 284 * Setup LINT1 as NMI. 285 * 286 * Must be setup edge trigger, active high. 287 * 288 * Enable LINT1 on BSP, if I/O APIC is enabled. 289 * 290 * Disable LINT1 on the APs. 291 */ 292 temp = LAPIC_READ(lvt_lint1); 293 temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | 294 APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); 295 temp |= APIC_LVT_MASKED | APIC_LVT_DM_NMI; 296 if (bsp && ioapic_enable) 297 temp &= ~APIC_LVT_MASKED; 298 LAPIC_WRITE(lvt_lint1, temp); 299 300 /* 301 * Mask the LAPIC error interrupt, LAPIC performance counter 302 * interrupt. 303 */ 304 LAPIC_WRITE(lvt_error, LAPIC_READ(lvt_error) | APIC_LVT_MASKED); 305 LAPIC_WRITE(lvt_pcint, LAPIC_READ(lvt_pcint) | APIC_LVT_MASKED); 306 307 /* 308 * Set LAPIC timer vector and mask the LAPIC timer interrupt. 309 */ 310 timer = LAPIC_READ(lvt_timer); 311 timer &= ~APIC_LVTT_VECTOR; 312 timer |= XTIMER_OFFSET; 313 timer |= APIC_LVTT_MASKED; 314 LAPIC_WRITE(lvt_timer, timer); 315 316 /* 317 * Set the Task Priority Register as needed. At the moment allow 318 * interrupts on all cpus (the APs will remain CLId until they are 319 * ready to deal). 320 */ 321 temp = LAPIC_READ(tpr); 322 temp &= ~APIC_TPR_PRIO; /* clear priority field */ 323 LAPIC_WRITE(tpr, temp); 324 325 /* 326 * AMD specific setup 327 */ 328 if (cpu_vendor_id == CPU_VENDOR_AMD && lapic_mem != NULL && 329 (LAPIC_MEM_READ(version) & APIC_VER_AMD_EXT_SPACE)) { 330 uint32_t ext_feat; 331 uint32_t count; 332 uint32_t max_count; 333 uint32_t lvt; 334 uint32_t i; 335 336 ext_feat = LAPIC_MEM_READ(ext_feat); 337 count = (ext_feat & APIC_EXTFEAT_MASK) >> APIC_EXTFEAT_SHIFT; 338 max_count = sizeof(lapic_mem->ext_lvt) / 339 sizeof(lapic_mem->ext_lvt[0]); 340 if (count > max_count) 341 count = max_count; 342 for (i = 0; i < count; ++i) { 343 lvt = LAPIC_MEM_READ(ext_lvt[i].lvt); 344 345 lvt &= ~(APIC_LVT_POLARITY_MASK | APIC_LVT_TRIG_MASK | 346 APIC_LVT_DM_MASK | APIC_LVT_MASKED); 347 lvt |= APIC_LVT_MASKED | APIC_LVT_DM_FIXED; 348 349 switch(i) { 350 case APIC_EXTLVT_IBS: 351 break; 352 case APIC_EXTLVT_MCA: 353 break; 354 case APIC_EXTLVT_DEI: 355 break; 356 case APIC_EXTLVT_SBI: 357 break; 358 default: 359 break; 360 } 361 if (bsp) { 362 kprintf(" LAPIC AMD elvt%d: 0x%08x", 363 i, LAPIC_MEM_READ(ext_lvt[i].lvt)); 364 if (LAPIC_MEM_READ(ext_lvt[i].lvt) != lvt) 365 kprintf(" -> 0x%08x", lvt); 366 kprintf("\n"); 367 } 368 LAPIC_MEM_WRITE(ext_lvt[i].lvt, lvt); 369 } 370 } 371 372 /* 373 * Enable the LAPIC 374 */ 375 temp = LAPIC_READ(svr); 376 temp |= APIC_SVR_ENABLE; /* enable the LAPIC */ 377 temp &= ~APIC_SVR_FOCUS_DISABLE; /* enable lopri focus processor */ 378 379 if (LAPIC_READ(version) & APIC_VER_EOI_SUPP) { 380 if (temp & APIC_SVR_EOI_SUPP) { 381 temp &= ~APIC_SVR_EOI_SUPP; 382 if (bsp) 383 kprintf(" LAPIC disabling EOI supp\n"); 384 } 385 } 386 387 /* 388 * Set the spurious interrupt vector. The low 4 bits of the vector 389 * must be 1111. 390 */ 391 if ((XSPURIOUSINT_OFFSET & 0x0F) != 0x0F) 392 panic("bad XSPURIOUSINT_OFFSET: 0x%08x", XSPURIOUSINT_OFFSET); 393 temp &= ~APIC_SVR_VECTOR; 394 temp |= XSPURIOUSINT_OFFSET; 395 396 LAPIC_WRITE(svr, temp); 397 398 /* 399 * Pump out a few EOIs to clean out interrupts that got through 400 * before we were able to set the TPR. 401 */ 402 LAPIC_WRITE(eoi, 0); 403 LAPIC_WRITE(eoi, 0); 404 LAPIC_WRITE(eoi, 0); 405 406 if (bsp) { 407 lapic_timer_calibrate(); 408 if (lapic_timer_enable) { 409 if (cpu_thermal_feature & CPUID_THERMAL_ARAT) { 410 /* 411 * Local APIC timer will not stop 412 * in deep C-state. 413 */ 414 lapic_cputimer_intr.caps |= 415 CPUTIMER_INTR_CAP_PS; 416 } 417 if (lapic_use_tscdeadline) { 418 lapic_cputimer_intr.reload = 419 lapic_timer_tscdlt_reload; 420 } 421 cputimer_intr_register(&lapic_cputimer_intr); 422 cputimer_intr_select(&lapic_cputimer_intr, 0); 423 } 424 } else if (!lapic_use_tscdeadline) { 425 lapic_timer_set_divisor(lapic_timer_divisor_idx); 426 } 427 428 if (bootverbose) 429 apic_dump("apic_initialize()"); 430 } 431 432 static void 433 lapic_timer_set_divisor(int divisor_idx) 434 { 435 KKASSERT(divisor_idx >= 0 && divisor_idx < APIC_TIMER_NDIVISORS); 436 LAPIC_WRITE(dcr_timer, lapic_timer_divisors[divisor_idx]); 437 } 438 439 static void 440 lapic_timer_oneshot(u_int count) 441 { 442 uint32_t value; 443 444 value = LAPIC_READ(lvt_timer); 445 value &= ~(APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT); 446 LAPIC_WRITE(lvt_timer, value); 447 LAPIC_WRITE(icr_timer, count); 448 } 449 450 static void 451 lapic_timer_oneshot_quick(u_int count) 452 { 453 LAPIC_WRITE(icr_timer, count); 454 } 455 456 static void 457 lapic_timer_tscdeadline_quick(uint64_t diff) 458 { 459 uint64_t val = rdtsc() + diff; 460 461 wrmsr(MSR_TSC_DEADLINE, val); 462 tsc_deadlines[mycpuid].timestamp = val; 463 } 464 465 static uint64_t 466 lapic_scale_to_tsc(unsigned value, unsigned scale) 467 { 468 uint64_t val; 469 470 val = value; 471 val *= tsc_frequency; 472 val += (scale - 1); 473 val /= scale; 474 return val; 475 } 476 477 #define MAX_MEASURE_RETRIES 100 478 479 static u_int64_t 480 do_tsc_calibration(u_int us, u_int64_t apic_delay_tsc) 481 { 482 u_int64_t old_tsc1, old_tsc2, new_tsc1, new_tsc2; 483 u_int64_t diff, count; 484 u_int64_t a; 485 u_int32_t start, end; 486 int retries1 = 0, retries2 = 0; 487 488 retry1: 489 lapic_timer_oneshot_quick(APIC_TIMER_MAX_COUNT); 490 old_tsc1 = rdtsc_ordered(); 491 start = LAPIC_READ(ccr_timer); 492 old_tsc2 = rdtsc_ordered(); 493 if (apic_delay_tsc > 0 && retries1 < MAX_MEASURE_RETRIES && 494 old_tsc2 - old_tsc1 > 2 * apic_delay_tsc) { 495 retries1++; 496 goto retry1; 497 } 498 DELAY(us); 499 retry2: 500 new_tsc1 = rdtsc_ordered(); 501 end = LAPIC_READ(ccr_timer); 502 new_tsc2 = rdtsc_ordered(); 503 if (apic_delay_tsc > 0 && retries2 < MAX_MEASURE_RETRIES && 504 new_tsc2 - new_tsc1 > 2 * apic_delay_tsc) { 505 retries2++; 506 goto retry2; 507 } 508 if (end == 0) 509 return 0; 510 511 count = start - end; 512 513 /* Make sure the lapic can count for up to 2s */ 514 a = (unsigned)APIC_TIMER_MAX_COUNT; 515 if (us < 2000000 && (u_int64_t)count * 2000000 >= a * us) 516 return 0; 517 518 if (lapic_calibrate_test > 0 && (retries1 > 0 || retries2 > 0)) { 519 kprintf("%s: retries1=%d retries2=%d\n", 520 __func__, retries1, retries2); 521 } 522 523 diff = (new_tsc1 - old_tsc1) + (new_tsc2 - old_tsc2); 524 /* XXX First estimate if the total TSC diff value makes sense */ 525 /* This will almost overflow, but only almost :) */ 526 count = (2 * count * tsc_frequency) / diff; 527 528 return count; 529 } 530 531 static uint64_t 532 do_cputimer_calibration(u_int us) 533 { 534 sysclock_t value; 535 sysclock_t start, end, beginning, finish; 536 537 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT); 538 beginning = LAPIC_READ(ccr_timer); 539 start = sys_cputimer->count(); 540 DELAY(us); 541 end = sys_cputimer->count(); 542 finish = LAPIC_READ(ccr_timer); 543 if (finish == 0) 544 return 0; 545 /* value is the LAPIC timer difference. */ 546 value = beginning - finish; 547 /* end is the sys_cputimer difference. */ 548 end -= start; 549 if (end == 0) 550 return 0; 551 value = ((uint64_t)value * sys_cputimer->freq) / end; 552 return value; 553 } 554 555 static void 556 lapic_timer_calibrate(void) 557 { 558 sysclock_t value; 559 u_int64_t apic_delay_tsc = 0; 560 int use_tsc_calibration = 0; 561 562 /* No need to calibrate lapic_timer, if we will use TSC Deadline mode */ 563 if (lapic_use_tscdeadline) { 564 lapic_timer_tscfreq_shift = 0; 565 while ((tsc_frequency >> lapic_timer_tscfreq_shift) > INT_MAX) 566 lapic_timer_tscfreq_shift++; 567 lapic_cputimer_intr.freq = 568 tsc_frequency >> lapic_timer_tscfreq_shift; 569 kprintf( 570 "lapic: TSC Deadline Mode: shift %d, frequency %u Hz\n", 571 lapic_timer_tscfreq_shift, lapic_cputimer_intr.freq); 572 return; 573 } 574 575 /* 576 * On real hardware, tsc_invariant == 0 wouldn't be an issue, but in 577 * a virtual machine the frequency may get changed by the host. 578 */ 579 if (tsc_frequency != 0 && tsc_invariant && lapic_calibrate_fast) 580 use_tsc_calibration = 1; 581 582 if (use_tsc_calibration) { 583 u_int64_t min_apic_tsc = 0, max_apic_tsc = 0; 584 u_int64_t old_tsc, new_tsc; 585 sysclock_t val; 586 int i; 587 588 /* warm up */ 589 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT); 590 for (i = 0; i < 10; i++) 591 val = LAPIC_READ(ccr_timer); 592 593 for (i = 0; i < 100; i++) { 594 old_tsc = rdtsc_ordered(); 595 val = LAPIC_READ(ccr_timer); 596 new_tsc = rdtsc_ordered(); 597 new_tsc -= old_tsc; 598 apic_delay_tsc += new_tsc; 599 if (min_apic_tsc == 0 || 600 min_apic_tsc > new_tsc) { 601 min_apic_tsc = new_tsc; 602 } 603 if (max_apic_tsc < new_tsc) 604 max_apic_tsc = new_tsc; 605 } 606 apic_delay_tsc /= 100; 607 kprintf( 608 "LAPIC latency (in TSC ticks): %lu min: %lu max: %lu\n", 609 apic_delay_tsc, min_apic_tsc, max_apic_tsc); 610 apic_delay_tsc = min_apic_tsc; 611 } 612 613 if (!use_tsc_calibration) { 614 int i; 615 616 /* 617 * Do some exercising of the lapic timer access. This improves 618 * precision of the subsequent calibration run in at least some 619 * virtualization cases. 620 */ 621 lapic_timer_set_divisor(0); 622 for (i = 0; i < 10; i++) 623 (void)do_cputimer_calibration(100); 624 } 625 /* Try to calibrate the local APIC timer. */ 626 for (lapic_timer_divisor_idx = 0; 627 lapic_timer_divisor_idx < APIC_TIMER_NDIVISORS; 628 lapic_timer_divisor_idx++) { 629 lapic_timer_set_divisor(lapic_timer_divisor_idx); 630 if (use_tsc_calibration) { 631 value = do_tsc_calibration(200*1000, apic_delay_tsc); 632 } else { 633 value = do_cputimer_calibration(2*1000*1000); 634 } 635 if (value != 0) 636 break; 637 } 638 if (lapic_timer_divisor_idx >= APIC_TIMER_NDIVISORS) 639 panic("lapic: no proper timer divisor?!"); 640 lapic_cputimer_intr.freq = value; 641 642 kprintf("lapic: divisor index %d, frequency %u Hz\n", 643 lapic_timer_divisor_idx, lapic_cputimer_intr.freq); 644 645 if (lapic_calibrate_test > 0) { 646 uint64_t freq; 647 int i; 648 649 for (i = 1; i <= 20; i++) { 650 if (use_tsc_calibration) { 651 freq = do_tsc_calibration(i*100*1000, 652 apic_delay_tsc); 653 } else { 654 freq = do_cputimer_calibration(i*100*1000); 655 } 656 if (freq != 0) 657 kprintf("%ums: %lu\n", i * 100, freq); 658 } 659 } 660 } 661 662 static void 663 lapic_timer_tscdlt_reload(struct cputimer_intr *cti, sysclock_t reload) 664 { 665 struct globaldata *gd = mycpu; 666 uint64_t diff, now, val; 667 668 if (reload > 1000*1000*1000) 669 reload = 1000*1000*1000; 670 diff = (uint64_t)reload * tsc_frequency / sys_cputimer->freq; 671 if (diff < 4) 672 diff = 4; 673 if (cpu_vendor_id == CPU_VENDOR_INTEL) 674 cpu_lfence(); 675 else 676 cpu_mfence(); 677 now = rdtsc(); 678 val = now + diff; 679 if (gd->gd_timer_running) { 680 uint64_t deadline = tsc_deadlines[mycpuid].timestamp; 681 if (deadline == 0 || now > deadline || val < deadline) { 682 wrmsr(MSR_TSC_DEADLINE, val); 683 tsc_deadlines[mycpuid].timestamp = val; 684 } 685 } else { 686 gd->gd_timer_running = 1; 687 wrmsr(MSR_TSC_DEADLINE, val); 688 tsc_deadlines[mycpuid].timestamp = val; 689 } 690 } 691 692 static void 693 lapic_mem_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload) 694 { 695 struct globaldata *gd = mycpu; 696 697 reload = (int64_t)reload * cti->freq / sys_cputimer->freq; 698 if (reload < 2) 699 reload = 2; 700 701 if (gd->gd_timer_running) { 702 if (reload < LAPIC_MEM_READ(ccr_timer)) 703 LAPIC_MEM_WRITE(icr_timer, reload); 704 } else { 705 gd->gd_timer_running = 1; 706 LAPIC_MEM_WRITE(icr_timer, reload); 707 } 708 } 709 710 static void 711 lapic_msr_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload) 712 { 713 struct globaldata *gd = mycpu; 714 715 reload = (int64_t)reload * cti->freq / sys_cputimer->freq; 716 if (reload < 2) 717 reload = 2; 718 719 if (gd->gd_timer_running) { 720 if (reload < LAPIC_MSR_READ(MSR_X2APIC_CCR_TIMER)) 721 LAPIC_MSR_WRITE(MSR_X2APIC_ICR_TIMER, reload); 722 } else { 723 gd->gd_timer_running = 1; 724 LAPIC_MSR_WRITE(MSR_X2APIC_ICR_TIMER, reload); 725 } 726 } 727 728 static void 729 lapic_timer_intr_enable(struct cputimer_intr *cti __unused) 730 { 731 uint32_t timer; 732 733 timer = LAPIC_READ(lvt_timer); 734 timer &= ~(APIC_LVTT_MASKED | APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT); 735 if (lapic_use_tscdeadline) 736 timer |= APIC_LVTT_TSCDLT; 737 LAPIC_WRITE(lvt_timer, timer); 738 if (lapic_use_tscdeadline) 739 cpu_mfence(); 740 741 lapic_timer_fixup_handler(NULL); 742 } 743 744 static void 745 lapic_timer_fixup_handler(void *arg) 746 { 747 int *started = arg; 748 749 if (started != NULL) 750 *started = 0; 751 752 if (cpu_vendor_id == CPU_VENDOR_AMD) { 753 /* 754 * Detect the presence of C1E capability mostly on latest 755 * dual-cores (or future) k8 family. This feature renders 756 * the local APIC timer dead, so we disable it by reading 757 * the Interrupt Pending Message register and clearing both 758 * C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27). 759 * 760 * Reference: 761 * "BIOS and Kernel Developer's Guide for AMD NPT 762 * Family 0Fh Processors" 763 * #32559 revision 3.00 764 */ 765 if ((cpu_id & 0x00000f00) == 0x00000f00 && 766 (cpu_id & 0x0fff0000) >= 0x00040000) { 767 uint64_t msr; 768 769 msr = rdmsr(0xc0010055); 770 if (msr & 0x18000000) { 771 struct globaldata *gd = mycpu; 772 773 kprintf("cpu%d: AMD C1E detected\n", 774 gd->gd_cpuid); 775 wrmsr(0xc0010055, msr & ~0x18000000ULL); 776 777 /* 778 * We are kinda stalled; 779 * kick start again. 780 */ 781 gd->gd_timer_running = 1; 782 if (lapic_use_tscdeadline) { 783 /* Maybe reached in Virtual Machines? */ 784 lapic_timer_tscdeadline_quick(5000); 785 } else { 786 lapic_timer_oneshot_quick(2); 787 } 788 789 if (started != NULL) 790 *started = 1; 791 } 792 } 793 } 794 } 795 796 static void 797 lapic_timer_restart_handler(void *dummy __unused) 798 { 799 int started; 800 801 lapic_timer_fixup_handler(&started); 802 if (!started) { 803 struct globaldata *gd = mycpu; 804 805 gd->gd_timer_running = 1; 806 if (lapic_use_tscdeadline) { 807 /* Maybe reached in Virtual Machines? */ 808 lapic_timer_tscdeadline_quick(5000); 809 } else { 810 lapic_timer_oneshot_quick(2); 811 } 812 } 813 } 814 815 /* 816 * This function is called only by ACPICA code currently: 817 * - AMD C1E fixup. AMD C1E only seems to happen after ACPI 818 * module controls PM. So once ACPICA is attached, we try 819 * to apply the fixup to prevent LAPIC timer from hanging. 820 */ 821 static void 822 lapic_timer_intr_pmfixup(struct cputimer_intr *cti __unused) 823 { 824 lwkt_send_ipiq_mask(smp_active_mask, 825 lapic_timer_fixup_handler, NULL); 826 } 827 828 static void 829 lapic_timer_intr_restart(struct cputimer_intr *cti __unused) 830 { 831 lwkt_send_ipiq_mask(smp_active_mask, lapic_timer_restart_handler, NULL); 832 } 833 834 835 /* 836 * dump contents of local APIC registers 837 */ 838 void 839 apic_dump(char* str) 840 { 841 kprintf("SMP: CPU%d %s:\n", mycpu->gd_cpuid, str); 842 kprintf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", 843 LAPIC_READ(lvt_lint0), LAPIC_READ(lvt_lint1), LAPIC_READ(tpr), 844 LAPIC_READ(svr)); 845 } 846 847 /* 848 * Inter Processor Interrupt functions. 849 */ 850 851 static __inline void 852 lapic_mem_icr_unpend(const char *func) 853 { 854 if (LAPIC_MEM_READ(icr_lo) & APIC_DELSTAT_PEND) { 855 int64_t tsc; 856 int loops = 1; 857 858 tsc = rdtsc(); 859 while (LAPIC_MEM_READ(icr_lo) & APIC_DELSTAT_PEND) { 860 cpu_pause(); 861 if ((tsc_sclock_t)(rdtsc() - 862 (tsc + tsc_frequency)) > 0) { 863 tsc = rdtsc(); 864 if (++loops > 30) { 865 panic("%s: cpu%d apic stalled", 866 func, mycpuid); 867 } else { 868 kprintf("%s: cpu%d apic stalled\n", 869 func, mycpuid); 870 } 871 } 872 } 873 } 874 } 875 876 /* 877 * Send APIC IPI 'vector' to 'destType' via 'deliveryMode'. 878 * 879 * destType is 1 of: APIC_DEST_SELF, APIC_DEST_ALLISELF, APIC_DEST_ALLESELF 880 * vector is any valid SYSTEM INT vector 881 * delivery_mode is 1 of: APIC_DELMODE_FIXED, APIC_DELMODE_LOWPRIO 882 * 883 * WARNINGS! 884 * 885 * We now implement a per-cpu interlock (gd->gd_npoll) to prevent more than 886 * one IPI from being sent to any given cpu at a time. Thus we no longer 887 * have to process incoming IPIs while waiting for the status to clear. 888 * No deadlock should be possible. 889 * 890 * We now physically disable interrupts for the lapic ICR operation. If 891 * we do not do this then it looks like an EOI sent to the lapic (which 892 * occurs even with a critical section) can interfere with the command 893 * register ready status and cause an IPI to be lost. 894 * 895 * e.g. an interrupt can occur, issue the EOI, IRET, and cause the command 896 * register to busy just before we write to icr_lo, resulting in a lost 897 * issuance. This only appears to occur on Intel cpus and is not 898 * documented. It could simply be that cpus are so fast these days that 899 * it was always an issue, but is only now rearing its ugly head. This 900 * is conjecture. 901 */ 902 static int 903 lapic_mem_ipi(int dest_type, int vector, int delivery_mode) 904 { 905 lapic_mem_icr_unpend(__func__); 906 lapic_mem_icr_set(0, 907 dest_type | APIC_LEVEL_ASSERT | delivery_mode | vector); 908 return 0; 909 } 910 911 static int 912 lapic_msr_ipi(int dest_type, int vector, int delivery_mode) 913 { 914 lapic_msr_icr_set(0, 915 dest_type | APIC_LEVEL_ASSERT | delivery_mode | vector); 916 return 0; 917 } 918 919 /* 920 * Interrupts must be hard-disabled by caller 921 */ 922 static void 923 lapic_mem_single_ipi(int cpu, int vector, int delivery_mode) 924 { 925 lapic_mem_icr_unpend(__func__); 926 lapic_mem_icr_set(CPUID_TO_APICID(cpu), 927 APIC_DEST_DESTFLD | APIC_LEVEL_ASSERT | delivery_mode | vector); 928 } 929 930 static void 931 lapic_msr_single_ipi(int cpu, int vector, int delivery_mode) 932 { 933 lapic_msr_icr_set(CPUID_TO_APICID(cpu), 934 APIC_DEST_DESTFLD | APIC_LEVEL_ASSERT | delivery_mode | vector); 935 } 936 937 /* 938 * Send APIC IPI 'vector' to 'target's via 'delivery_mode'. 939 * 940 * target is a bitmask of destination cpus. Vector is any 941 * valid system INT vector. Delivery mode may be either 942 * APIC_DELMODE_FIXED or APIC_DELMODE_LOWPRIO. 943 * 944 * Interrupts must be hard-disabled by caller 945 */ 946 void 947 selected_apic_ipi(cpumask_t target, int vector, int delivery_mode) 948 { 949 while (CPUMASK_TESTNZERO(target)) { 950 int n = BSFCPUMASK(target); 951 CPUMASK_NANDBIT(target, n); 952 single_apic_ipi(n, vector, delivery_mode); 953 } 954 } 955 956 /* 957 * Load a 'downcount time' in uSeconds. 958 */ 959 void 960 set_apic_timer(int us) 961 { 962 u_int count; 963 964 if (lapic_use_tscdeadline) { 965 uint64_t val; 966 967 val = lapic_scale_to_tsc(us, 1000000); 968 val += rdtsc(); 969 /* No need to arm the lapic here, just track the timeout. */ 970 tsc_deadlines[mycpuid].downcount_time = val; 971 return; 972 } 973 974 /* 975 * When we reach here, lapic timer's frequency 976 * must have been calculated as well as the 977 * divisor (lapic->dcr_timer is setup during the 978 * divisor calculation). 979 */ 980 KKASSERT(lapic_cputimer_intr.freq != 0 && 981 lapic_timer_divisor_idx >= 0); 982 983 count = ((us * (int64_t)lapic_cputimer_intr.freq) + 999999) / 1000000; 984 lapic_timer_oneshot(count); 985 } 986 987 988 /* 989 * Read remaining time in timer, in microseconds (rounded up). 990 */ 991 int 992 read_apic_timer(void) 993 { 994 uint64_t val; 995 996 if (lapic_use_tscdeadline) { 997 uint64_t now; 998 999 val = tsc_deadlines[mycpuid].downcount_time; 1000 now = rdtsc(); 1001 if (val == 0 || now > val) { 1002 return 0; 1003 } else { 1004 val -= now; 1005 val *= 1000000; 1006 val += (tsc_frequency - 1); 1007 val /= tsc_frequency; 1008 if (val > INT_MAX) 1009 val = INT_MAX; 1010 return val; 1011 } 1012 } 1013 1014 val = LAPIC_READ(ccr_timer); 1015 if (val == 0) 1016 return 0; 1017 1018 KKASSERT(lapic_cputimer_intr.freq > 0); 1019 val *= 1000000; 1020 val += (lapic_cputimer_intr.freq - 1); 1021 val /= lapic_cputimer_intr.freq; 1022 if (val > INT_MAX) 1023 val = INT_MAX; 1024 return val; 1025 } 1026 1027 1028 /* 1029 * Spin-style delay, set delay time in uS, spin till it drains. 1030 */ 1031 void 1032 u_sleep(int count) 1033 { 1034 set_apic_timer(count); 1035 while (read_apic_timer()) 1036 /* spin */ ; 1037 } 1038 1039 int 1040 lapic_unused_apic_id(int start) 1041 { 1042 int i; 1043 1044 for (i = start; i < APICID_MAX; ++i) { 1045 if (APICID_TO_CPUID(i) == -1) 1046 return i; 1047 } 1048 return NAPICID; 1049 } 1050 1051 void 1052 lapic_map(vm_paddr_t lapic_addr) 1053 { 1054 lapic_mem = pmap_mapdev_uncacheable(lapic_addr, sizeof(struct LAPIC)); 1055 } 1056 1057 void 1058 lapic_x2apic_enter(boolean_t bsp) 1059 { 1060 uint64_t apic_base; 1061 1062 KASSERT(x2apic_enable, ("X2APIC mode is not enabled")); 1063 1064 /* 1065 * X2APIC mode is requested, if it has not been enabled by the BIOS, 1066 * enable it now. 1067 */ 1068 apic_base = rdmsr(MSR_APICBASE); 1069 if ((apic_base & APICBASE_X2APIC) == 0) { 1070 wrmsr(MSR_APICBASE, 1071 apic_base | APICBASE_X2APIC | APICBASE_ENABLED); 1072 } 1073 if (bsp) { 1074 lapic_eoi = lapic_msr_eoi; 1075 apic_ipi = lapic_msr_ipi; 1076 single_apic_ipi = lapic_msr_single_ipi; 1077 lapic_cputimer_intr.reload = lapic_msr_timer_intr_reload; 1078 } 1079 } 1080 1081 static TAILQ_HEAD(, lapic_enumerator) lapic_enumerators = 1082 TAILQ_HEAD_INITIALIZER(lapic_enumerators); 1083 1084 int 1085 lapic_config(void) 1086 { 1087 struct lapic_enumerator *e; 1088 uint64_t apic_base; 1089 int error, i, ap_max; 1090 1091 KKASSERT(lapic_enable); 1092 1093 lapic_eoi = lapic_mem_eoi; 1094 apic_ipi = lapic_mem_ipi; 1095 single_apic_ipi = lapic_mem_single_ipi; 1096 1097 TUNABLE_INT_FETCH("hw.x2apic_enable", &x2apic_enable); 1098 if (x2apic_enable < 0) 1099 x2apic_enable = 1; 1100 1101 if ((cpu_feature2 & CPUID2_X2APIC) == 0) { 1102 /* X2APIC is not supported. */ 1103 x2apic_enable = 0; 1104 } else if (!x2apic_enable) { 1105 /* 1106 * If the BIOS enabled the X2APIC mode, then we would stick 1107 * with the X2APIC mode. 1108 */ 1109 apic_base = rdmsr(MSR_APICBASE); 1110 if (apic_base & APICBASE_X2APIC) { 1111 kprintf("LAPIC: BIOS enabled X2APIC mode\n"); 1112 x2apic_enable = 1; 1113 } 1114 } 1115 1116 if (x2apic_enable) { 1117 /* 1118 * Enter X2APIC mode. 1119 */ 1120 kprintf("LAPIC: enter X2APIC mode\n"); 1121 lapic_x2apic_enter(TRUE); 1122 } 1123 1124 for (i = 0; i < NAPICID; ++i) 1125 APICID_TO_CPUID(i) = -1; 1126 1127 TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) { 1128 error = e->lapic_probe(e); 1129 if (!error) 1130 break; 1131 } 1132 if (e == NULL) { 1133 kprintf("LAPIC: Can't find LAPIC\n"); 1134 return ENXIO; 1135 } 1136 1137 error = e->lapic_enumerate(e); 1138 if (error) { 1139 kprintf("LAPIC: enumeration failed\n"); 1140 return ENXIO; 1141 } 1142 1143 /* LAPIC is usable now. */ 1144 lapic_usable = 1; 1145 1146 ap_max = MAXCPU - 1; 1147 TUNABLE_INT_FETCH("hw.ap_max", &ap_max); 1148 if (ap_max > MAXCPU - 1) 1149 ap_max = MAXCPU - 1; 1150 1151 if (naps > ap_max) { 1152 kprintf("LAPIC: Warning use only %d out of %d " 1153 "available APs\n", 1154 ap_max, naps); 1155 naps = ap_max; 1156 } 1157 1158 return 0; 1159 } 1160 1161 void 1162 lapic_enumerator_register(struct lapic_enumerator *ne) 1163 { 1164 struct lapic_enumerator *e; 1165 1166 TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) { 1167 if (e->lapic_prio < ne->lapic_prio) { 1168 TAILQ_INSERT_BEFORE(e, ne, lapic_link); 1169 return; 1170 } 1171 } 1172 TAILQ_INSERT_TAIL(&lapic_enumerators, ne, lapic_link); 1173 } 1174 1175 void 1176 lapic_set_cpuid(int cpu_id, int apic_id) 1177 { 1178 CPUID_TO_APICID(cpu_id) = apic_id; 1179 APICID_TO_CPUID(apic_id) = cpu_id; 1180 } 1181 1182 void 1183 lapic_fixup_noioapic(void) 1184 { 1185 u_int temp; 1186 1187 /* Only allowed on BSP */ 1188 KKASSERT(mycpuid == 0); 1189 KKASSERT(!ioapic_enable); 1190 1191 temp = LAPIC_READ(lvt_lint0); 1192 temp &= ~APIC_LVT_MASKED; 1193 LAPIC_WRITE(lvt_lint0, temp); 1194 1195 temp = LAPIC_READ(lvt_lint1); 1196 temp |= APIC_LVT_MASKED; 1197 LAPIC_WRITE(lvt_lint1, temp); 1198 } 1199 1200 static void 1201 lapic_mem_eoi(void) 1202 { 1203 log_lapic(mem_eoi); 1204 LAPIC_MEM_WRITE(eoi, 0); 1205 } 1206 1207 static void 1208 lapic_msr_eoi(void) 1209 { 1210 log_lapic(msr_eoi); 1211 LAPIC_MSR_WRITE(MSR_X2APIC_EOI, 0); 1212 } 1213 1214 static void 1215 lapic_mem_seticr_sync(uint32_t apic_id, uint32_t icr_lo_val) 1216 { 1217 lapic_mem_icr_set(apic_id, icr_lo_val); 1218 while (LAPIC_MEM_READ(icr_lo) & APIC_DELSTAT_PEND) 1219 /* spin */; 1220 } 1221 1222 void 1223 lapic_seticr_sync(uint32_t apic_id, uint32_t icr_lo_val) 1224 { 1225 if (x2apic_enable) 1226 lapic_msr_icr_set(apic_id, icr_lo_val); 1227 else 1228 lapic_mem_seticr_sync(apic_id, icr_lo_val); 1229 } 1230 1231 static void 1232 lapic_sysinit(void *dummy __unused) 1233 { 1234 if (lapic_enable) { 1235 int error; 1236 1237 error = lapic_config(); 1238 if (error) 1239 lapic_enable = 0; 1240 } 1241 if (!lapic_enable) 1242 x2apic_enable = 0; 1243 1244 if (lapic_enable) { 1245 /* Initialize BSP's local APIC */ 1246 lapic_init(TRUE); 1247 } else if (ioapic_enable) { 1248 ioapic_enable = 0; 1249 icu_reinit_noioapic(); 1250 } 1251 } 1252 SYSINIT(lapic, SI_BOOT2_LAPIC, SI_ORDER_FIRST, lapic_sysinit, NULL); 1253