1 /* 2 * Copyright (c) 1996, by Steve Passe 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. The name of the developer may NOT be used to endorse or promote products 11 * derived from this software without specific prior written permission. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: src/sys/i386/i386/mpapic.c,v 1.37.2.7 2003/01/25 02:31:47 peter Exp $ 26 */ 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/kernel.h> 31 #include <sys/ktr.h> 32 #include <sys/bus.h> 33 #include <sys/machintr.h> 34 #include <machine/globaldata.h> 35 #include <machine/clock.h> 36 #include <machine/limits.h> 37 #include <machine/smp.h> 38 #include <machine/md_var.h> 39 #include <machine/pmap.h> 40 #include <machine/specialreg.h> 41 #include <machine_base/apic/lapic.h> 42 #include <machine_base/apic/ioapic.h> 43 #include <machine_base/apic/ioapic_abi.h> 44 #include <machine_base/apic/apicvar.h> 45 #include <machine_base/icu/icu_var.h> 46 #include <machine/segments.h> 47 #include <sys/thread2.h> 48 #include <sys/spinlock2.h> 49 50 #include <machine/cputypes.h> 51 #include <machine/intr_machdep.h> 52 53 #if !defined(KTR_LAPIC) 54 #define KTR_LAPIC KTR_ALL 55 #endif 56 KTR_INFO_MASTER(lapic); 57 KTR_INFO(KTR_LAPIC, lapic, eoi, 0, "eoi"); 58 #define log_lapic(name) KTR_LOG(lapic_ ## name) 59 60 extern int naps; 61 62 volatile lapic_t *lapic; 63 64 static void lapic_timer_calibrate(void); 65 static void lapic_timer_set_divisor(int); 66 static void lapic_timer_fixup_handler(void *); 67 static void lapic_timer_restart_handler(void *); 68 69 70 static int lapic_timer_enable = 1; 71 TUNABLE_INT("hw.lapic_timer_enable", &lapic_timer_enable); 72 73 static int lapic_timer_tscdeadline = 1; 74 TUNABLE_INT("hw.lapic_timer_tscdeadline", &lapic_timer_tscdeadline); 75 76 static int lapic_calibrate_test = 0; 77 TUNABLE_INT("hw.lapic_calibrate_test", &lapic_calibrate_test); 78 79 static int lapic_calibrate_fast = 1; 80 TUNABLE_INT("hw.lapic_calibrate_fast", &lapic_calibrate_fast); 81 82 static void lapic_timer_tscdlt_reload(struct cputimer_intr *, sysclock_t); 83 static void lapic_timer_intr_reload(struct cputimer_intr *, sysclock_t); 84 static void lapic_timer_intr_enable(struct cputimer_intr *); 85 static void lapic_timer_intr_restart(struct cputimer_intr *); 86 static void lapic_timer_intr_pmfixup(struct cputimer_intr *); 87 88 static struct cputimer_intr lapic_cputimer_intr = { 89 .freq = 0, 90 .reload = lapic_timer_intr_reload, 91 .enable = lapic_timer_intr_enable, 92 .config = cputimer_intr_default_config, 93 .restart = lapic_timer_intr_restart, 94 .pmfixup = lapic_timer_intr_pmfixup, 95 .initclock = cputimer_intr_default_initclock, 96 .pcpuhand = NULL, 97 .next = SLIST_ENTRY_INITIALIZER, 98 .name = "lapic", 99 .type = CPUTIMER_INTR_LAPIC, 100 .prio = CPUTIMER_INTR_PRIO_LAPIC, 101 .caps = CPUTIMER_INTR_CAP_NONE, 102 .priv = NULL 103 }; 104 105 static int lapic_timer_divisor_idx = -1; 106 static const uint32_t lapic_timer_divisors[] = { 107 APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, 108 APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128, APIC_TDCR_1 109 }; 110 #define APIC_TIMER_NDIVISORS (int)(NELEM(lapic_timer_divisors)) 111 112 static int lapic_use_tscdeadline = 0; 113 /* The raw TSC frequency might not fit into a sysclock_t value. */ 114 static int lapic_timer_tscfreq_shift; 115 116 /* 117 * APIC ID <-> CPU ID mapping structures. 118 */ 119 int cpu_id_to_apic_id[NAPICID]; 120 int apic_id_to_cpu_id[NAPICID]; 121 int lapic_enable = 1; 122 123 /* Separate cachelines for each cpu's info. */ 124 struct deadlines { 125 uint64_t timestamp; 126 uint64_t downcount_time; 127 uint64_t padding[6]; 128 }; 129 struct deadlines *tsc_deadlines = NULL; 130 131 static void lapic_eoi_func(void); 132 133 void (*lapic_eoi)(void); 134 135 /* 136 * Enable LAPIC, configure interrupts. 137 */ 138 void 139 lapic_init(boolean_t bsp) 140 { 141 uint32_t timer; 142 u_int temp; 143 144 if (bsp) { 145 /* Decide whether we want to use TSC Deadline mode. */ 146 if (lapic_timer_tscdeadline != 0 && 147 (cpu_feature2 & CPUID2_TSCDLT) && 148 tsc_invariant && tsc_frequency != 0) { 149 lapic_use_tscdeadline = 1; 150 tsc_deadlines = kmalloc_cachealign( 151 sizeof(struct deadlines) * (naps + 1), 152 M_DEVBUF, M_WAITOK | M_ZERO); 153 } 154 } 155 156 /* 157 * Install vectors 158 * 159 * Since IDT is shared between BSP and APs, these vectors 160 * only need to be installed once; we do it on BSP. 161 */ 162 if (bsp) { 163 if (cpu_vendor_id == CPU_VENDOR_AMD && 164 CPUID_TO_FAMILY(cpu_id) >= 0x0f && 165 CPUID_TO_FAMILY(cpu_id) < 0x17) { /* XXX */ 166 uint32_t tcr; 167 168 /* 169 * Set the LINTEN bit in the HyperTransport 170 * Transaction Control Register. 171 * 172 * This will cause EXTINT and NMI interrupts 173 * routed over the hypertransport bus to be 174 * fed into the LAPIC LINT0/LINT1. If the bit 175 * isn't set, the interrupts will go to the 176 * general cpu INTR/NMI pins. On a dual-core 177 * cpu the interrupt winds up going to BOTH cpus. 178 * The first cpu that does the interrupt ack 179 * cycle will get the correct interrupt. The 180 * second cpu that does it will get a spurious 181 * interrupt vector (typically IRQ 7). 182 */ 183 outl(0x0cf8, 184 (1 << 31) | /* enable */ 185 (0 << 16) | /* bus */ 186 (0x18 << 11) | /* dev (cpu + 0x18) */ 187 (0 << 8) | /* func */ 188 0x68 /* reg */ 189 ); 190 tcr = inl(0xcfc); 191 if ((tcr & 0x00010000) == 0) { 192 kprintf("LAPIC: AMD LINTEN on\n"); 193 outl(0xcfc, tcr|0x00010000); 194 } 195 outl(0x0cf8, 0); 196 } 197 198 /* Install a 'Spurious INTerrupt' vector */ 199 setidt_global(XSPURIOUSINT_OFFSET, Xspuriousint, 200 SDT_SYSIGT, SEL_KPL, 0); 201 202 /* Install a timer vector */ 203 setidt_global(XTIMER_OFFSET, Xtimer, 204 SDT_SYSIGT, SEL_KPL, 0); 205 206 /* Install an inter-CPU IPI for TLB invalidation */ 207 setidt_global(XINVLTLB_OFFSET, Xinvltlb, 208 SDT_SYSIGT, SEL_KPL, 0); 209 210 /* Install an inter-CPU IPI for IPIQ messaging */ 211 setidt_global(XIPIQ_OFFSET, Xipiq, 212 SDT_SYSIGT, SEL_KPL, 0); 213 214 /* Install an inter-CPU IPI for CPU stop/restart */ 215 setidt_global(XCPUSTOP_OFFSET, Xcpustop, 216 SDT_SYSIGT, SEL_KPL, 0); 217 218 /* Install an inter-CPU IPI for TLB invalidation */ 219 setidt_global(XSNIFF_OFFSET, Xsniff, 220 SDT_SYSIGT, SEL_KPL, 0); 221 } 222 223 /* 224 * Setup LINT0 as ExtINT on the BSP. This is theoretically an 225 * aggregate interrupt input from the 8259. The INTA cycle 226 * will be routed to the external controller (the 8259) which 227 * is expected to supply the vector. 228 * 229 * Must be setup edge triggered, active high. 230 * 231 * Disable LINT0 on BSP, if I/O APIC is enabled. 232 * 233 * Disable LINT0 on the APs. It doesn't matter what delivery 234 * mode we use because we leave it masked. 235 */ 236 temp = lapic->lvt_lint0; 237 temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | 238 APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); 239 if (bsp) { 240 temp |= APIC_LVT_DM_EXTINT; 241 if (ioapic_enable) 242 temp |= APIC_LVT_MASKED; 243 } else { 244 temp |= APIC_LVT_DM_FIXED | APIC_LVT_MASKED; 245 } 246 lapic->lvt_lint0 = temp; 247 248 /* 249 * Setup LINT1 as NMI. 250 * 251 * Must be setup edge trigger, active high. 252 * 253 * Enable LINT1 on BSP, if I/O APIC is enabled. 254 * 255 * Disable LINT1 on the APs. 256 */ 257 temp = lapic->lvt_lint1; 258 temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | 259 APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); 260 temp |= APIC_LVT_MASKED | APIC_LVT_DM_NMI; 261 if (bsp && ioapic_enable) 262 temp &= ~APIC_LVT_MASKED; 263 lapic->lvt_lint1 = temp; 264 265 /* 266 * Mask the LAPIC error interrupt, LAPIC performance counter 267 * interrupt. 268 */ 269 lapic->lvt_error = lapic->lvt_error | APIC_LVT_MASKED; 270 lapic->lvt_pcint = lapic->lvt_pcint | APIC_LVT_MASKED; 271 272 /* 273 * Set LAPIC timer vector and mask the LAPIC timer interrupt. 274 */ 275 timer = lapic->lvt_timer; 276 timer &= ~APIC_LVTT_VECTOR; 277 timer |= XTIMER_OFFSET; 278 timer |= APIC_LVTT_MASKED; 279 lapic->lvt_timer = timer; 280 281 /* 282 * Set the Task Priority Register as needed. At the moment allow 283 * interrupts on all cpus (the APs will remain CLId until they are 284 * ready to deal). 285 */ 286 temp = lapic->tpr; 287 temp &= ~APIC_TPR_PRIO; /* clear priority field */ 288 lapic->tpr = temp; 289 290 /* 291 * AMD specific setup 292 */ 293 if (cpu_vendor_id == CPU_VENDOR_AMD && 294 (lapic->version & APIC_VER_AMD_EXT_SPACE)) { 295 uint32_t ext_feat; 296 uint32_t count; 297 uint32_t max_count; 298 uint32_t lvt; 299 uint32_t i; 300 301 ext_feat = lapic->ext_feat; 302 count = (ext_feat & APIC_EXTFEAT_MASK) >> APIC_EXTFEAT_SHIFT; 303 max_count = sizeof(lapic->ext_lvt) / sizeof(lapic->ext_lvt[0]); 304 if (count > max_count) 305 count = max_count; 306 for (i = 0; i < count; ++i) { 307 lvt = lapic->ext_lvt[i].lvt; 308 309 lvt &= ~(APIC_LVT_POLARITY_MASK | APIC_LVT_TRIG_MASK | 310 APIC_LVT_DM_MASK | APIC_LVT_MASKED); 311 lvt |= APIC_LVT_MASKED | APIC_LVT_DM_FIXED; 312 313 switch(i) { 314 case APIC_EXTLVT_IBS: 315 break; 316 case APIC_EXTLVT_MCA: 317 break; 318 case APIC_EXTLVT_DEI: 319 break; 320 case APIC_EXTLVT_SBI: 321 break; 322 default: 323 break; 324 } 325 if (bsp) { 326 kprintf(" LAPIC AMD elvt%d: 0x%08x", 327 i, lapic->ext_lvt[i].lvt); 328 if (lapic->ext_lvt[i].lvt != lvt) 329 kprintf(" -> 0x%08x", lvt); 330 kprintf("\n"); 331 } 332 lapic->ext_lvt[i].lvt = lvt; 333 } 334 } 335 336 /* 337 * Enable the LAPIC 338 */ 339 temp = lapic->svr; 340 temp |= APIC_SVR_ENABLE; /* enable the LAPIC */ 341 temp &= ~APIC_SVR_FOCUS_DISABLE; /* enable lopri focus processor */ 342 343 if (lapic->version & APIC_VER_EOI_SUPP) { 344 if (temp & APIC_SVR_EOI_SUPP) { 345 temp &= ~APIC_SVR_EOI_SUPP; 346 if (bsp) 347 kprintf(" LAPIC disabling EOI supp\n"); 348 } 349 } 350 351 /* 352 * Set the spurious interrupt vector. The low 4 bits of the vector 353 * must be 1111. 354 */ 355 if ((XSPURIOUSINT_OFFSET & 0x0F) != 0x0F) 356 panic("bad XSPURIOUSINT_OFFSET: 0x%08x", XSPURIOUSINT_OFFSET); 357 temp &= ~APIC_SVR_VECTOR; 358 temp |= XSPURIOUSINT_OFFSET; 359 360 lapic->svr = temp; 361 362 /* 363 * Pump out a few EOIs to clean out interrupts that got through 364 * before we were able to set the TPR. 365 */ 366 lapic->eoi = 0; 367 lapic->eoi = 0; 368 lapic->eoi = 0; 369 370 if (bsp) { 371 lapic_timer_calibrate(); 372 if (lapic_timer_enable) { 373 if (cpu_thermal_feature & CPUID_THERMAL_ARAT) { 374 /* 375 * Local APIC timer will not stop 376 * in deep C-state. 377 */ 378 lapic_cputimer_intr.caps |= 379 CPUTIMER_INTR_CAP_PS; 380 } 381 if (lapic_use_tscdeadline) { 382 lapic_cputimer_intr.reload = 383 lapic_timer_tscdlt_reload; 384 } 385 cputimer_intr_register(&lapic_cputimer_intr); 386 cputimer_intr_select(&lapic_cputimer_intr, 0); 387 } 388 } else if (!lapic_use_tscdeadline) { 389 lapic_timer_set_divisor(lapic_timer_divisor_idx); 390 } 391 392 if (bootverbose) 393 apic_dump("apic_initialize()"); 394 } 395 396 static void 397 lapic_timer_set_divisor(int divisor_idx) 398 { 399 KKASSERT(divisor_idx >= 0 && divisor_idx < APIC_TIMER_NDIVISORS); 400 lapic->dcr_timer = lapic_timer_divisors[divisor_idx]; 401 } 402 403 static void 404 lapic_timer_oneshot(u_int count) 405 { 406 uint32_t value; 407 408 value = lapic->lvt_timer; 409 value &= ~(APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT); 410 lapic->lvt_timer = value; 411 lapic->icr_timer = count; 412 } 413 414 static void 415 lapic_timer_oneshot_quick(u_int count) 416 { 417 lapic->icr_timer = count; 418 } 419 420 static void 421 lapic_timer_tscdeadline_quick(uint64_t diff) 422 { 423 uint64_t val = rdtsc() + diff; 424 425 wrmsr(MSR_TSC_DEADLINE, val); 426 tsc_deadlines[mycpuid].timestamp = val; 427 } 428 429 static uint64_t 430 lapic_scale_to_tsc(unsigned value, unsigned scale) 431 { 432 uint64_t val; 433 434 val = value; 435 val *= tsc_frequency; 436 val += (scale - 1); 437 val /= scale; 438 return val; 439 } 440 441 #define MAX_MEASURE_RETRIES 100 442 443 static u_int64_t 444 do_tsc_calibration(u_int us, u_int64_t apic_delay_tsc) 445 { 446 u_int64_t old_tsc1, old_tsc2, new_tsc1, new_tsc2; 447 u_int64_t diff, count; 448 u_int64_t a; 449 u_int32_t start, end; 450 int retries1 = 0, retries2 = 0; 451 452 retry1: 453 lapic_timer_oneshot_quick(APIC_TIMER_MAX_COUNT); 454 old_tsc1 = rdtsc_ordered(); 455 start = lapic->ccr_timer; 456 old_tsc2 = rdtsc_ordered(); 457 if (apic_delay_tsc > 0 && retries1 < MAX_MEASURE_RETRIES && 458 old_tsc2 - old_tsc1 > 2 * apic_delay_tsc) { 459 retries1++; 460 goto retry1; 461 } 462 DELAY(us); 463 retry2: 464 new_tsc1 = rdtsc_ordered(); 465 end = lapic->ccr_timer; 466 new_tsc2 = rdtsc_ordered(); 467 if (apic_delay_tsc > 0 && retries2 < MAX_MEASURE_RETRIES && 468 new_tsc2 - new_tsc1 > 2 * apic_delay_tsc) { 469 retries2++; 470 goto retry2; 471 } 472 if (end == 0) 473 return 0; 474 475 count = start - end; 476 477 /* Make sure the lapic can count for up to 2s */ 478 a = (unsigned)APIC_TIMER_MAX_COUNT; 479 if (us < 2000000 && (u_int64_t)count * 2000000 >= a * us) 480 return 0; 481 482 if (lapic_calibrate_test > 0 && (retries1 > 0 || retries2 > 0)) { 483 kprintf("%s: retries1=%d retries2=%d\n", 484 __func__, retries1, retries2); 485 } 486 487 diff = (new_tsc1 - old_tsc1) + (new_tsc2 - old_tsc2); 488 /* XXX First estimate if the total TSC diff value makes sense */ 489 /* This will almost overflow, but only almost :) */ 490 count = (2 * count * tsc_frequency) / diff; 491 492 return count; 493 } 494 495 static uint64_t 496 do_cputimer_calibration(u_int us) 497 { 498 sysclock_t value; 499 sysclock_t start, end, beginning, finish; 500 501 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT); 502 beginning = lapic->ccr_timer; 503 start = sys_cputimer->count(); 504 DELAY(us); 505 end = sys_cputimer->count(); 506 finish = lapic->ccr_timer; 507 if (finish == 0) 508 return 0; 509 /* value is the LAPIC timer difference. */ 510 value = beginning - finish; 511 /* end is the sys_cputimer difference. */ 512 end -= start; 513 if (end == 0) 514 return 0; 515 value = ((uint64_t)value * sys_cputimer->freq) / end; 516 return value; 517 } 518 519 static void 520 lapic_timer_calibrate(void) 521 { 522 sysclock_t value; 523 u_int64_t apic_delay_tsc = 0; 524 int use_tsc_calibration = 0; 525 526 /* No need to calibrate lapic_timer, if we will use TSC Deadline mode */ 527 if (lapic_use_tscdeadline) { 528 lapic_timer_tscfreq_shift = 0; 529 while ((tsc_frequency >> lapic_timer_tscfreq_shift) > INT_MAX) 530 lapic_timer_tscfreq_shift++; 531 lapic_cputimer_intr.freq = 532 tsc_frequency >> lapic_timer_tscfreq_shift; 533 kprintf( 534 "lapic: TSC Deadline Mode: shift %d, frequency %u Hz\n", 535 lapic_timer_tscfreq_shift, lapic_cputimer_intr.freq); 536 return; 537 } 538 539 /* 540 * On real hardware, tsc_invariant == 0 wouldn't be an issue, but in 541 * a virtual machine the frequency may get changed by the host. 542 */ 543 if (tsc_frequency != 0 && tsc_invariant && lapic_calibrate_fast) 544 use_tsc_calibration = 1; 545 546 if (use_tsc_calibration) { 547 u_int64_t min_apic_tsc = 0, max_apic_tsc = 0; 548 u_int64_t old_tsc, new_tsc; 549 sysclock_t val; 550 int i; 551 552 /* warm up */ 553 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT); 554 for (i = 0; i < 10; i++) 555 val = lapic->ccr_timer; 556 557 for (i = 0; i < 100; i++) { 558 old_tsc = rdtsc_ordered(); 559 val = lapic->ccr_timer; 560 new_tsc = rdtsc_ordered(); 561 new_tsc -= old_tsc; 562 apic_delay_tsc += new_tsc; 563 if (min_apic_tsc == 0 || 564 min_apic_tsc > new_tsc) { 565 min_apic_tsc = new_tsc; 566 } 567 if (max_apic_tsc < new_tsc) 568 max_apic_tsc = new_tsc; 569 } 570 apic_delay_tsc /= 100; 571 kprintf( 572 "LAPIC latency (in TSC ticks): %lu min: %lu max: %lu\n", 573 apic_delay_tsc, min_apic_tsc, max_apic_tsc); 574 apic_delay_tsc = min_apic_tsc; 575 } 576 577 if (!use_tsc_calibration) { 578 int i; 579 580 /* 581 * Do some exercising of the lapic timer access. This improves 582 * precision of the subsequent calibration run in at least some 583 * virtualization cases. 584 */ 585 lapic_timer_set_divisor(0); 586 for (i = 0; i < 10; i++) 587 (void)do_cputimer_calibration(100); 588 } 589 /* Try to calibrate the local APIC timer. */ 590 for (lapic_timer_divisor_idx = 0; 591 lapic_timer_divisor_idx < APIC_TIMER_NDIVISORS; 592 lapic_timer_divisor_idx++) { 593 lapic_timer_set_divisor(lapic_timer_divisor_idx); 594 if (use_tsc_calibration) { 595 value = do_tsc_calibration(200*1000, apic_delay_tsc); 596 } else { 597 value = do_cputimer_calibration(2*1000*1000); 598 } 599 if (value != 0) 600 break; 601 } 602 if (lapic_timer_divisor_idx >= APIC_TIMER_NDIVISORS) 603 panic("lapic: no proper timer divisor?!"); 604 lapic_cputimer_intr.freq = value; 605 606 kprintf("lapic: divisor index %d, frequency %u Hz\n", 607 lapic_timer_divisor_idx, lapic_cputimer_intr.freq); 608 609 if (lapic_calibrate_test > 0) { 610 uint64_t freq; 611 int i; 612 613 for (i = 1; i <= 20; i++) { 614 if (use_tsc_calibration) { 615 freq = do_tsc_calibration(i*100*1000, 616 apic_delay_tsc); 617 } else { 618 freq = do_cputimer_calibration(i*100*1000); 619 } 620 if (freq != 0) 621 kprintf("%ums: %lu\n", i * 100, freq); 622 } 623 } 624 } 625 626 static void 627 lapic_timer_tscdlt_reload(struct cputimer_intr *cti, sysclock_t reload) 628 { 629 struct globaldata *gd = mycpu; 630 uint64_t diff, now, val; 631 632 if (reload > 1000*1000*1000) 633 reload = 1000*1000*1000; 634 diff = (uint64_t)reload * tsc_frequency / sys_cputimer->freq; 635 if (diff < 4) 636 diff = 4; 637 if (cpu_vendor_id == CPU_VENDOR_INTEL) 638 cpu_lfence(); 639 else 640 cpu_mfence(); 641 now = rdtsc(); 642 val = now + diff; 643 if (gd->gd_timer_running) { 644 uint64_t deadline = tsc_deadlines[mycpuid].timestamp; 645 if (deadline == 0 || now > deadline || val < deadline) { 646 wrmsr(MSR_TSC_DEADLINE, val); 647 tsc_deadlines[mycpuid].timestamp = val; 648 } 649 } else { 650 gd->gd_timer_running = 1; 651 wrmsr(MSR_TSC_DEADLINE, val); 652 tsc_deadlines[mycpuid].timestamp = val; 653 } 654 } 655 656 static void 657 lapic_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload) 658 { 659 struct globaldata *gd = mycpu; 660 661 reload = (int64_t)reload * cti->freq / sys_cputimer->freq; 662 if (reload < 2) 663 reload = 2; 664 665 if (gd->gd_timer_running) { 666 if (reload < lapic->ccr_timer) 667 lapic_timer_oneshot_quick(reload); 668 } else { 669 gd->gd_timer_running = 1; 670 lapic_timer_oneshot_quick(reload); 671 } 672 } 673 674 static void 675 lapic_timer_intr_enable(struct cputimer_intr *cti __unused) 676 { 677 uint32_t timer; 678 679 timer = lapic->lvt_timer; 680 timer &= ~(APIC_LVTT_MASKED | APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT); 681 if (lapic_use_tscdeadline) 682 timer |= APIC_LVTT_TSCDLT; 683 lapic->lvt_timer = timer; 684 if (lapic_use_tscdeadline) 685 cpu_mfence(); 686 687 lapic_timer_fixup_handler(NULL); 688 } 689 690 static void 691 lapic_timer_fixup_handler(void *arg) 692 { 693 int *started = arg; 694 695 if (started != NULL) 696 *started = 0; 697 698 if (cpu_vendor_id == CPU_VENDOR_AMD) { 699 /* 700 * Detect the presence of C1E capability mostly on latest 701 * dual-cores (or future) k8 family. This feature renders 702 * the local APIC timer dead, so we disable it by reading 703 * the Interrupt Pending Message register and clearing both 704 * C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27). 705 * 706 * Reference: 707 * "BIOS and Kernel Developer's Guide for AMD NPT 708 * Family 0Fh Processors" 709 * #32559 revision 3.00 710 */ 711 if ((cpu_id & 0x00000f00) == 0x00000f00 && 712 (cpu_id & 0x0fff0000) >= 0x00040000) { 713 uint64_t msr; 714 715 msr = rdmsr(0xc0010055); 716 if (msr & 0x18000000) { 717 struct globaldata *gd = mycpu; 718 719 kprintf("cpu%d: AMD C1E detected\n", 720 gd->gd_cpuid); 721 wrmsr(0xc0010055, msr & ~0x18000000ULL); 722 723 /* 724 * We are kinda stalled; 725 * kick start again. 726 */ 727 gd->gd_timer_running = 1; 728 if (lapic_use_tscdeadline) { 729 /* Maybe reached in Virtual Machines? */ 730 lapic_timer_tscdeadline_quick(5000); 731 } else { 732 lapic_timer_oneshot_quick(2); 733 } 734 735 if (started != NULL) 736 *started = 1; 737 } 738 } 739 } 740 } 741 742 static void 743 lapic_timer_restart_handler(void *dummy __unused) 744 { 745 int started; 746 747 lapic_timer_fixup_handler(&started); 748 if (!started) { 749 struct globaldata *gd = mycpu; 750 751 gd->gd_timer_running = 1; 752 if (lapic_use_tscdeadline) { 753 /* Maybe reached in Virtual Machines? */ 754 lapic_timer_tscdeadline_quick(5000); 755 } else { 756 lapic_timer_oneshot_quick(2); 757 } 758 } 759 } 760 761 /* 762 * This function is called only by ACPICA code currently: 763 * - AMD C1E fixup. AMD C1E only seems to happen after ACPI 764 * module controls PM. So once ACPICA is attached, we try 765 * to apply the fixup to prevent LAPIC timer from hanging. 766 */ 767 static void 768 lapic_timer_intr_pmfixup(struct cputimer_intr *cti __unused) 769 { 770 lwkt_send_ipiq_mask(smp_active_mask, 771 lapic_timer_fixup_handler, NULL); 772 } 773 774 static void 775 lapic_timer_intr_restart(struct cputimer_intr *cti __unused) 776 { 777 lwkt_send_ipiq_mask(smp_active_mask, lapic_timer_restart_handler, NULL); 778 } 779 780 781 /* 782 * dump contents of local APIC registers 783 */ 784 void 785 apic_dump(char* str) 786 { 787 kprintf("SMP: CPU%d %s:\n", mycpu->gd_cpuid, str); 788 kprintf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", 789 lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr); 790 } 791 792 /* 793 * Inter Processor Interrupt functions. 794 */ 795 796 /* 797 * Send APIC IPI 'vector' to 'destType' via 'deliveryMode'. 798 * 799 * destType is 1 of: APIC_DEST_SELF, APIC_DEST_ALLISELF, APIC_DEST_ALLESELF 800 * vector is any valid SYSTEM INT vector 801 * delivery_mode is 1 of: APIC_DELMODE_FIXED, APIC_DELMODE_LOWPRIO 802 * 803 * WARNINGS! 804 * 805 * We now implement a per-cpu interlock (gd->gd_npoll) to prevent more than 806 * one IPI from being sent to any given cpu at a time. Thus we no longer 807 * have to process incoming IPIs while waiting for the status to clear. 808 * No deadlock should be possible. 809 * 810 * We now physically disable interrupts for the lapic ICR operation. If 811 * we do not do this then it looks like an EOI sent to the lapic (which 812 * occurs even with a critical section) can interfere with the command 813 * register ready status and cause an IPI to be lost. 814 * 815 * e.g. an interrupt can occur, issue the EOI, IRET, and cause the command 816 * register to busy just before we write to icr_lo, resulting in a lost 817 * issuance. This only appears to occur on Intel cpus and is not 818 * documented. It could simply be that cpus are so fast these days that 819 * it was always an issue, but is only now rearing its ugly head. This 820 * is conjecture. 821 */ 822 int 823 apic_ipi(int dest_type, int vector, int delivery_mode) 824 { 825 uint32_t icr_hi; 826 uint32_t icr_lo; 827 int64_t tsc; 828 int loops = 1; 829 830 if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { 831 tsc = rdtsc(); 832 while ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { 833 cpu_pause(); 834 if ((tsc_sclock_t)(rdtsc() - 835 (tsc + tsc_frequency)) > 0) { 836 kprintf("apic_ipi stall cpu %d (sing)\n", 837 mycpuid); 838 tsc = rdtsc(); 839 if (++loops > 30) 840 panic("apic stall"); 841 } 842 } 843 } 844 icr_hi = lapic->icr_hi & ~APIC_ID_MASK; 845 icr_lo = (lapic->icr_lo & APIC_ICRLO_RESV_MASK) | dest_type | 846 APIC_LEVEL_ASSERT | delivery_mode | vector; 847 lapic->icr_hi = icr_hi; 848 lapic->icr_lo = icr_lo; 849 850 return 0; 851 } 852 853 /* 854 * Interrupts must be hard-disabled by caller 855 */ 856 void 857 single_apic_ipi(int cpu, int vector, int delivery_mode) 858 { 859 uint32_t icr_lo; 860 uint32_t icr_hi; 861 int64_t tsc; 862 int loops = 1; 863 864 if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { 865 tsc = rdtsc(); 866 while ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { 867 cpu_pause(); 868 if ((tsc_sclock_t)(rdtsc() - 869 (tsc + tsc_frequency)) > 0) { 870 kprintf("single_apic_ipi stall cpu %d (sing)\n", 871 mycpuid); 872 tsc = rdtsc(); 873 if (++loops > 30) 874 panic("apic stall"); 875 } 876 } 877 } 878 icr_hi = lapic->icr_hi & ~APIC_ID_MASK; 879 icr_hi |= (CPUID_TO_APICID(cpu) << 24); 880 881 /* build ICR_LOW */ 882 icr_lo = (lapic->icr_lo & APIC_ICRLO_RESV_MASK) | 883 APIC_LEVEL_ASSERT | APIC_DEST_DESTFLD | delivery_mode | vector; 884 885 /* write APIC ICR */ 886 lapic->icr_hi = icr_hi; 887 lapic->icr_lo = icr_lo; 888 } 889 890 #if 0 891 892 /* 893 * Returns 0 if the apic is busy, 1 if we were able to queue the request. 894 * 895 * NOT WORKING YET! The code as-is may end up not queueing an IPI at all 896 * to the target, and the scheduler does not 'poll' for IPI messages. 897 */ 898 int 899 single_apic_ipi_passive(int cpu, int vector, int delivery_mode) 900 { 901 u_long icr_lo; 902 u_long icr_hi; 903 unsigned long rflags; 904 905 rflags = read_rflags(); 906 cpu_disable_intr(); 907 if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { 908 write_rflags(rflags); 909 return(0); 910 } 911 icr_hi = lapic->icr_hi & ~APIC_ID_MASK; 912 icr_hi |= (CPUID_TO_APICID(cpu) << 24); 913 lapic->icr_hi = icr_hi; 914 915 /* build IRC_LOW */ 916 icr_lo = (lapic->icr_lo & APIC_RESV2_MASK) | 917 APIC_DEST_DESTFLD | delivery_mode | vector; 918 919 /* write APIC ICR */ 920 lapic->icr_lo = icr_lo; 921 write_rflags(rflags); 922 923 return(1); 924 } 925 926 #endif 927 928 /* 929 * Send APIC IPI 'vector' to 'target's via 'delivery_mode'. 930 * 931 * target is a bitmask of destination cpus. Vector is any 932 * valid system INT vector. Delivery mode may be either 933 * APIC_DELMODE_FIXED or APIC_DELMODE_LOWPRIO. 934 * 935 * Interrupts must be hard-disabled by caller 936 */ 937 void 938 selected_apic_ipi(cpumask_t target, int vector, int delivery_mode) 939 { 940 while (CPUMASK_TESTNZERO(target)) { 941 int n = BSFCPUMASK(target); 942 CPUMASK_NANDBIT(target, n); 943 single_apic_ipi(n, vector, delivery_mode); 944 } 945 } 946 947 /* 948 * Load a 'downcount time' in uSeconds. 949 */ 950 void 951 set_apic_timer(int us) 952 { 953 u_int count; 954 955 if (lapic_use_tscdeadline) { 956 uint64_t val; 957 958 val = lapic_scale_to_tsc(us, 1000000); 959 val += rdtsc(); 960 /* No need to arm the lapic here, just track the timeout. */ 961 tsc_deadlines[mycpuid].downcount_time = val; 962 return; 963 } 964 965 /* 966 * When we reach here, lapic timer's frequency 967 * must have been calculated as well as the 968 * divisor (lapic->dcr_timer is setup during the 969 * divisor calculation). 970 */ 971 KKASSERT(lapic_cputimer_intr.freq != 0 && 972 lapic_timer_divisor_idx >= 0); 973 974 count = ((us * (int64_t)lapic_cputimer_intr.freq) + 999999) / 1000000; 975 lapic_timer_oneshot(count); 976 } 977 978 979 /* 980 * Read remaining time in timer, in microseconds (rounded up). 981 */ 982 int 983 read_apic_timer(void) 984 { 985 uint64_t val; 986 987 if (lapic_use_tscdeadline) { 988 uint64_t now; 989 990 val = tsc_deadlines[mycpuid].downcount_time; 991 now = rdtsc(); 992 if (val == 0 || now > val) { 993 return 0; 994 } else { 995 val -= now; 996 val *= 1000000; 997 val += (tsc_frequency - 1); 998 val /= tsc_frequency; 999 if (val > INT_MAX) 1000 val = INT_MAX; 1001 return val; 1002 } 1003 } 1004 1005 val = lapic->ccr_timer; 1006 if (val == 0) 1007 return 0; 1008 1009 KKASSERT(lapic_cputimer_intr.freq > 0); 1010 val *= 1000000; 1011 val += (lapic_cputimer_intr.freq - 1); 1012 val /= lapic_cputimer_intr.freq; 1013 if (val > INT_MAX) 1014 val = INT_MAX; 1015 return val; 1016 } 1017 1018 1019 /* 1020 * Spin-style delay, set delay time in uS, spin till it drains. 1021 */ 1022 void 1023 u_sleep(int count) 1024 { 1025 set_apic_timer(count); 1026 while (read_apic_timer()) 1027 /* spin */ ; 1028 } 1029 1030 int 1031 lapic_unused_apic_id(int start) 1032 { 1033 int i; 1034 1035 for (i = start; i < APICID_MAX; ++i) { 1036 if (APICID_TO_CPUID(i) == -1) 1037 return i; 1038 } 1039 return NAPICID; 1040 } 1041 1042 void 1043 lapic_map(vm_paddr_t lapic_addr) 1044 { 1045 lapic = pmap_mapdev_uncacheable(lapic_addr, sizeof(struct LAPIC)); 1046 } 1047 1048 static TAILQ_HEAD(, lapic_enumerator) lapic_enumerators = 1049 TAILQ_HEAD_INITIALIZER(lapic_enumerators); 1050 1051 int 1052 lapic_config(void) 1053 { 1054 struct lapic_enumerator *e; 1055 int error, i, ap_max; 1056 1057 KKASSERT(lapic_enable); 1058 1059 for (i = 0; i < NAPICID; ++i) 1060 APICID_TO_CPUID(i) = -1; 1061 1062 TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) { 1063 error = e->lapic_probe(e); 1064 if (!error) 1065 break; 1066 } 1067 if (e == NULL) { 1068 kprintf("LAPIC: Can't find LAPIC\n"); 1069 return ENXIO; 1070 } 1071 1072 error = e->lapic_enumerate(e); 1073 if (error) { 1074 kprintf("LAPIC: enumeration failed\n"); 1075 return ENXIO; 1076 } 1077 1078 ap_max = MAXCPU - 1; 1079 TUNABLE_INT_FETCH("hw.ap_max", &ap_max); 1080 if (ap_max > MAXCPU - 1) 1081 ap_max = MAXCPU - 1; 1082 1083 if (naps > ap_max) { 1084 kprintf("LAPIC: Warning use only %d out of %d " 1085 "available APs\n", 1086 ap_max, naps); 1087 naps = ap_max; 1088 } 1089 1090 return 0; 1091 } 1092 1093 void 1094 lapic_enumerator_register(struct lapic_enumerator *ne) 1095 { 1096 struct lapic_enumerator *e; 1097 1098 TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) { 1099 if (e->lapic_prio < ne->lapic_prio) { 1100 TAILQ_INSERT_BEFORE(e, ne, lapic_link); 1101 return; 1102 } 1103 } 1104 TAILQ_INSERT_TAIL(&lapic_enumerators, ne, lapic_link); 1105 } 1106 1107 void 1108 lapic_set_cpuid(int cpu_id, int apic_id) 1109 { 1110 CPUID_TO_APICID(cpu_id) = apic_id; 1111 APICID_TO_CPUID(apic_id) = cpu_id; 1112 } 1113 1114 void 1115 lapic_fixup_noioapic(void) 1116 { 1117 u_int temp; 1118 1119 /* Only allowed on BSP */ 1120 KKASSERT(mycpuid == 0); 1121 KKASSERT(!ioapic_enable); 1122 1123 temp = lapic->lvt_lint0; 1124 temp &= ~APIC_LVT_MASKED; 1125 lapic->lvt_lint0 = temp; 1126 1127 temp = lapic->lvt_lint1; 1128 temp |= APIC_LVT_MASKED; 1129 lapic->lvt_lint1 = temp; 1130 } 1131 1132 static void 1133 lapic_eoi_func(void) 1134 { 1135 log_lapic(eoi); 1136 lapic->eoi = 0; 1137 } 1138 1139 static void 1140 lapic_sysinit(void *dummy __unused) 1141 { 1142 if (lapic_enable) { 1143 int error; 1144 1145 lapic_eoi = lapic_eoi_func; 1146 1147 error = lapic_config(); 1148 if (error) 1149 lapic_enable = 0; 1150 } 1151 1152 if (lapic_enable) { 1153 /* Initialize BSP's local APIC */ 1154 lapic_init(TRUE); 1155 } else if (ioapic_enable) { 1156 ioapic_enable = 0; 1157 icu_reinit_noioapic(); 1158 } 1159 } 1160 SYSINIT(lapic, SI_BOOT2_LAPIC, SI_ORDER_FIRST, lapic_sysinit, NULL); 1161