1 /* 2 * Copyright (c) 1996, by Steve Passe 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. The name of the developer may NOT be used to endorse or promote products 11 * derived from this software without specific prior written permission. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: src/sys/i386/i386/mpapic.c,v 1.37.2.7 2003/01/25 02:31:47 peter Exp $ 26 */ 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/kernel.h> 31 #include <sys/bus.h> 32 #include <sys/machintr.h> 33 #include <machine/globaldata.h> 34 #include <machine/clock.h> 35 #include <machine/limits.h> 36 #include <machine/smp.h> 37 #include <machine/md_var.h> 38 #include <machine/pmap.h> 39 #include <machine/specialreg.h> 40 #include <machine_base/apic/lapic.h> 41 #include <machine_base/apic/ioapic.h> 42 #include <machine_base/apic/ioapic_abi.h> 43 #include <machine_base/apic/apicvar.h> 44 #include <machine_base/icu/icu_var.h> 45 #include <machine/segments.h> 46 #include <sys/thread2.h> 47 #include <sys/spinlock2.h> 48 49 #include <machine/cputypes.h> 50 #include <machine/intr_machdep.h> 51 52 extern int naps; 53 54 volatile lapic_t *lapic; 55 56 static void lapic_timer_calibrate(void); 57 static void lapic_timer_set_divisor(int); 58 static void lapic_timer_fixup_handler(void *); 59 static void lapic_timer_restart_handler(void *); 60 61 62 static int lapic_timer_enable = 1; 63 TUNABLE_INT("hw.lapic_timer_enable", &lapic_timer_enable); 64 65 static int lapic_timer_tscdeadline = 1; 66 TUNABLE_INT("hw.lapic_timer_tscdeadline", &lapic_timer_tscdeadline); 67 68 static int lapic_calibrate_test = 0; 69 TUNABLE_INT("hw.lapic_calibrate_test", &lapic_calibrate_test); 70 71 static int lapic_calibrate_fast = 1; 72 TUNABLE_INT("hw.lapic_calibrate_fast", &lapic_calibrate_fast); 73 74 static void lapic_timer_tscdlt_reload(struct cputimer_intr *, sysclock_t); 75 static void lapic_timer_intr_reload(struct cputimer_intr *, sysclock_t); 76 static void lapic_timer_intr_enable(struct cputimer_intr *); 77 static void lapic_timer_intr_restart(struct cputimer_intr *); 78 static void lapic_timer_intr_pmfixup(struct cputimer_intr *); 79 80 static struct cputimer_intr lapic_cputimer_intr = { 81 .freq = 0, 82 .reload = lapic_timer_intr_reload, 83 .enable = lapic_timer_intr_enable, 84 .config = cputimer_intr_default_config, 85 .restart = lapic_timer_intr_restart, 86 .pmfixup = lapic_timer_intr_pmfixup, 87 .initclock = cputimer_intr_default_initclock, 88 .pcpuhand = NULL, 89 .next = SLIST_ENTRY_INITIALIZER, 90 .name = "lapic", 91 .type = CPUTIMER_INTR_LAPIC, 92 .prio = CPUTIMER_INTR_PRIO_LAPIC, 93 .caps = CPUTIMER_INTR_CAP_NONE, 94 .priv = NULL 95 }; 96 97 static int lapic_timer_divisor_idx = -1; 98 static const uint32_t lapic_timer_divisors[] = { 99 APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, 100 APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128, APIC_TDCR_1 101 }; 102 #define APIC_TIMER_NDIVISORS (int)(NELEM(lapic_timer_divisors)) 103 104 static int lapic_use_tscdeadline = 0; 105 /* The raw TSC frequency might not fit into a sysclock_t value. */ 106 static int lapic_timer_tscfreq_shift; 107 108 /* 109 * APIC ID <-> CPU ID mapping structures. 110 */ 111 int cpu_id_to_apic_id[NAPICID]; 112 int apic_id_to_cpu_id[NAPICID]; 113 int lapic_enable = 1; 114 115 /* Separate cachelines for each cpu's info. */ 116 struct deadlines { 117 uint64_t timestamp; 118 uint64_t downcount_time; 119 uint64_t padding[6]; 120 }; 121 struct deadlines *tsc_deadlines = NULL; 122 123 /* 124 * Enable LAPIC, configure interrupts. 125 */ 126 void 127 lapic_init(boolean_t bsp) 128 { 129 uint32_t timer; 130 u_int temp; 131 132 if (bsp) { 133 /* Decide whether we want to use TSC Deadline mode. */ 134 if (lapic_timer_tscdeadline != 0 && 135 (cpu_feature2 & CPUID2_TSCDLT) && 136 tsc_invariant && tsc_frequency != 0) { 137 lapic_use_tscdeadline = 1; 138 tsc_deadlines = kmalloc_cachealign( 139 sizeof(struct deadlines) * (naps + 1), 140 M_DEVBUF, M_WAITOK | M_ZERO); 141 } 142 } 143 144 /* 145 * Install vectors 146 * 147 * Since IDT is shared between BSP and APs, these vectors 148 * only need to be installed once; we do it on BSP. 149 */ 150 if (bsp) { 151 if (cpu_vendor_id == CPU_VENDOR_AMD && 152 CPUID_TO_FAMILY(cpu_id) >= 0x0f && 153 CPUID_TO_FAMILY(cpu_id) < 0x17) { /* XXX */ 154 uint32_t tcr; 155 156 /* 157 * Set the LINTEN bit in the HyperTransport 158 * Transaction Control Register. 159 * 160 * This will cause EXTINT and NMI interrupts 161 * routed over the hypertransport bus to be 162 * fed into the LAPIC LINT0/LINT1. If the bit 163 * isn't set, the interrupts will go to the 164 * general cpu INTR/NMI pins. On a dual-core 165 * cpu the interrupt winds up going to BOTH cpus. 166 * The first cpu that does the interrupt ack 167 * cycle will get the correct interrupt. The 168 * second cpu that does it will get a spurious 169 * interrupt vector (typically IRQ 7). 170 */ 171 outl(0x0cf8, 172 (1 << 31) | /* enable */ 173 (0 << 16) | /* bus */ 174 (0x18 << 11) | /* dev (cpu + 0x18) */ 175 (0 << 8) | /* func */ 176 0x68 /* reg */ 177 ); 178 tcr = inl(0xcfc); 179 if ((tcr & 0x00010000) == 0) { 180 kprintf("LAPIC: AMD LINTEN on\n"); 181 outl(0xcfc, tcr|0x00010000); 182 } 183 outl(0x0cf8, 0); 184 } 185 186 /* Install a 'Spurious INTerrupt' vector */ 187 setidt_global(XSPURIOUSINT_OFFSET, Xspuriousint, 188 SDT_SYSIGT, SEL_KPL, 0); 189 190 /* Install a timer vector */ 191 setidt_global(XTIMER_OFFSET, Xtimer, 192 SDT_SYSIGT, SEL_KPL, 0); 193 194 /* Install an inter-CPU IPI for TLB invalidation */ 195 setidt_global(XINVLTLB_OFFSET, Xinvltlb, 196 SDT_SYSIGT, SEL_KPL, 0); 197 198 /* Install an inter-CPU IPI for IPIQ messaging */ 199 setidt_global(XIPIQ_OFFSET, Xipiq, 200 SDT_SYSIGT, SEL_KPL, 0); 201 202 /* Install an inter-CPU IPI for CPU stop/restart */ 203 setidt_global(XCPUSTOP_OFFSET, Xcpustop, 204 SDT_SYSIGT, SEL_KPL, 0); 205 206 /* Install an inter-CPU IPI for TLB invalidation */ 207 setidt_global(XSNIFF_OFFSET, Xsniff, 208 SDT_SYSIGT, SEL_KPL, 0); 209 } 210 211 /* 212 * Setup LINT0 as ExtINT on the BSP. This is theoretically an 213 * aggregate interrupt input from the 8259. The INTA cycle 214 * will be routed to the external controller (the 8259) which 215 * is expected to supply the vector. 216 * 217 * Must be setup edge triggered, active high. 218 * 219 * Disable LINT0 on BSP, if I/O APIC is enabled. 220 * 221 * Disable LINT0 on the APs. It doesn't matter what delivery 222 * mode we use because we leave it masked. 223 */ 224 temp = lapic->lvt_lint0; 225 temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | 226 APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); 227 if (bsp) { 228 temp |= APIC_LVT_DM_EXTINT; 229 if (ioapic_enable) 230 temp |= APIC_LVT_MASKED; 231 } else { 232 temp |= APIC_LVT_DM_FIXED | APIC_LVT_MASKED; 233 } 234 lapic->lvt_lint0 = temp; 235 236 /* 237 * Setup LINT1 as NMI. 238 * 239 * Must be setup edge trigger, active high. 240 * 241 * Enable LINT1 on BSP, if I/O APIC is enabled. 242 * 243 * Disable LINT1 on the APs. 244 */ 245 temp = lapic->lvt_lint1; 246 temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | 247 APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); 248 temp |= APIC_LVT_MASKED | APIC_LVT_DM_NMI; 249 if (bsp && ioapic_enable) 250 temp &= ~APIC_LVT_MASKED; 251 lapic->lvt_lint1 = temp; 252 253 /* 254 * Mask the LAPIC error interrupt, LAPIC performance counter 255 * interrupt. 256 */ 257 lapic->lvt_error = lapic->lvt_error | APIC_LVT_MASKED; 258 lapic->lvt_pcint = lapic->lvt_pcint | APIC_LVT_MASKED; 259 260 /* 261 * Set LAPIC timer vector and mask the LAPIC timer interrupt. 262 */ 263 timer = lapic->lvt_timer; 264 timer &= ~APIC_LVTT_VECTOR; 265 timer |= XTIMER_OFFSET; 266 timer |= APIC_LVTT_MASKED; 267 lapic->lvt_timer = timer; 268 269 /* 270 * Set the Task Priority Register as needed. At the moment allow 271 * interrupts on all cpus (the APs will remain CLId until they are 272 * ready to deal). 273 */ 274 temp = lapic->tpr; 275 temp &= ~APIC_TPR_PRIO; /* clear priority field */ 276 lapic->tpr = temp; 277 278 /* 279 * AMD specific setup 280 */ 281 if (cpu_vendor_id == CPU_VENDOR_AMD && 282 (lapic->version & APIC_VER_AMD_EXT_SPACE)) { 283 uint32_t ext_feat; 284 uint32_t count; 285 uint32_t max_count; 286 uint32_t lvt; 287 uint32_t i; 288 289 ext_feat = lapic->ext_feat; 290 count = (ext_feat & APIC_EXTFEAT_MASK) >> APIC_EXTFEAT_SHIFT; 291 max_count = sizeof(lapic->ext_lvt) / sizeof(lapic->ext_lvt[0]); 292 if (count > max_count) 293 count = max_count; 294 for (i = 0; i < count; ++i) { 295 lvt = lapic->ext_lvt[i].lvt; 296 297 lvt &= ~(APIC_LVT_POLARITY_MASK | APIC_LVT_TRIG_MASK | 298 APIC_LVT_DM_MASK | APIC_LVT_MASKED); 299 lvt |= APIC_LVT_MASKED | APIC_LVT_DM_FIXED; 300 301 switch(i) { 302 case APIC_EXTLVT_IBS: 303 break; 304 case APIC_EXTLVT_MCA: 305 break; 306 case APIC_EXTLVT_DEI: 307 break; 308 case APIC_EXTLVT_SBI: 309 break; 310 default: 311 break; 312 } 313 if (bsp) { 314 kprintf(" LAPIC AMD elvt%d: 0x%08x", 315 i, lapic->ext_lvt[i].lvt); 316 if (lapic->ext_lvt[i].lvt != lvt) 317 kprintf(" -> 0x%08x", lvt); 318 kprintf("\n"); 319 } 320 lapic->ext_lvt[i].lvt = lvt; 321 } 322 } 323 324 /* 325 * Enable the LAPIC 326 */ 327 temp = lapic->svr; 328 temp |= APIC_SVR_ENABLE; /* enable the LAPIC */ 329 temp &= ~APIC_SVR_FOCUS_DISABLE; /* enable lopri focus processor */ 330 331 if (lapic->version & APIC_VER_EOI_SUPP) { 332 if (temp & APIC_SVR_EOI_SUPP) { 333 temp &= ~APIC_SVR_EOI_SUPP; 334 if (bsp) 335 kprintf(" LAPIC disabling EOI supp\n"); 336 } 337 } 338 339 /* 340 * Set the spurious interrupt vector. The low 4 bits of the vector 341 * must be 1111. 342 */ 343 if ((XSPURIOUSINT_OFFSET & 0x0F) != 0x0F) 344 panic("bad XSPURIOUSINT_OFFSET: 0x%08x", XSPURIOUSINT_OFFSET); 345 temp &= ~APIC_SVR_VECTOR; 346 temp |= XSPURIOUSINT_OFFSET; 347 348 lapic->svr = temp; 349 350 /* 351 * Pump out a few EOIs to clean out interrupts that got through 352 * before we were able to set the TPR. 353 */ 354 lapic->eoi = 0; 355 lapic->eoi = 0; 356 lapic->eoi = 0; 357 358 if (bsp) { 359 lapic_timer_calibrate(); 360 if (lapic_timer_enable) { 361 if (cpu_thermal_feature & CPUID_THERMAL_ARAT) { 362 /* 363 * Local APIC timer will not stop 364 * in deep C-state. 365 */ 366 lapic_cputimer_intr.caps |= 367 CPUTIMER_INTR_CAP_PS; 368 } 369 if (lapic_use_tscdeadline) { 370 lapic_cputimer_intr.reload = 371 lapic_timer_tscdlt_reload; 372 } 373 cputimer_intr_register(&lapic_cputimer_intr); 374 cputimer_intr_select(&lapic_cputimer_intr, 0); 375 } 376 } else if (!lapic_use_tscdeadline) { 377 lapic_timer_set_divisor(lapic_timer_divisor_idx); 378 } 379 380 if (bootverbose) 381 apic_dump("apic_initialize()"); 382 } 383 384 static void 385 lapic_timer_set_divisor(int divisor_idx) 386 { 387 KKASSERT(divisor_idx >= 0 && divisor_idx < APIC_TIMER_NDIVISORS); 388 lapic->dcr_timer = lapic_timer_divisors[divisor_idx]; 389 } 390 391 static void 392 lapic_timer_oneshot(u_int count) 393 { 394 uint32_t value; 395 396 value = lapic->lvt_timer; 397 value &= ~(APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT); 398 lapic->lvt_timer = value; 399 lapic->icr_timer = count; 400 } 401 402 static void 403 lapic_timer_oneshot_quick(u_int count) 404 { 405 lapic->icr_timer = count; 406 } 407 408 static void 409 lapic_timer_tscdeadline_quick(uint64_t diff) 410 { 411 uint64_t val = rdtsc() + diff; 412 413 wrmsr(MSR_TSC_DEADLINE, val); 414 tsc_deadlines[mycpuid].timestamp = val; 415 } 416 417 static uint64_t 418 lapic_scale_to_tsc(unsigned value, unsigned scale) 419 { 420 uint64_t val; 421 422 val = value; 423 val *= tsc_frequency; 424 val += (scale - 1); 425 val /= scale; 426 return val; 427 } 428 429 #define MAX_MEASURE_RETRIES 100 430 431 static u_int64_t 432 do_tsc_calibration(u_int us, u_int64_t apic_delay_tsc) 433 { 434 u_int64_t old_tsc1, old_tsc2, new_tsc1, new_tsc2; 435 u_int64_t diff, count; 436 u_int64_t a; 437 u_int32_t start, end; 438 int retries1 = 0, retries2 = 0; 439 440 retry1: 441 lapic_timer_oneshot_quick(APIC_TIMER_MAX_COUNT); 442 old_tsc1 = rdtsc_ordered(); 443 start = lapic->ccr_timer; 444 old_tsc2 = rdtsc_ordered(); 445 if (apic_delay_tsc > 0 && retries1 < MAX_MEASURE_RETRIES && 446 old_tsc2 - old_tsc1 > 2 * apic_delay_tsc) { 447 retries1++; 448 goto retry1; 449 } 450 DELAY(us); 451 retry2: 452 new_tsc1 = rdtsc_ordered(); 453 end = lapic->ccr_timer; 454 new_tsc2 = rdtsc_ordered(); 455 if (apic_delay_tsc > 0 && retries2 < MAX_MEASURE_RETRIES && 456 new_tsc2 - new_tsc1 > 2 * apic_delay_tsc) { 457 retries2++; 458 goto retry2; 459 } 460 if (end == 0) 461 return 0; 462 463 count = start - end; 464 465 /* Make sure the lapic can count for up to 2s */ 466 a = (unsigned)APIC_TIMER_MAX_COUNT; 467 if (us < 2000000 && (u_int64_t)count * 2000000 >= a * us) 468 return 0; 469 470 if (lapic_calibrate_test > 0 && (retries1 > 0 || retries2 > 0)) { 471 kprintf("%s: retries1=%d retries2=%d\n", 472 __func__, retries1, retries2); 473 } 474 475 diff = (new_tsc1 - old_tsc1) + (new_tsc2 - old_tsc2); 476 /* XXX First estimate if the total TSC diff value makes sense */ 477 /* This will almost overflow, but only almost :) */ 478 count = (2 * count * tsc_frequency) / diff; 479 480 return count; 481 } 482 483 static uint64_t 484 do_cputimer_calibration(u_int us) 485 { 486 sysclock_t value; 487 sysclock_t start, end, beginning, finish; 488 489 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT); 490 beginning = lapic->ccr_timer; 491 start = sys_cputimer->count(); 492 DELAY(us); 493 end = sys_cputimer->count(); 494 finish = lapic->ccr_timer; 495 if (finish == 0) 496 return 0; 497 /* value is the LAPIC timer difference. */ 498 value = beginning - finish; 499 /* end is the sys_cputimer difference. */ 500 end -= start; 501 if (end == 0) 502 return 0; 503 value = ((uint64_t)value * sys_cputimer->freq) / end; 504 return value; 505 } 506 507 static void 508 lapic_timer_calibrate(void) 509 { 510 sysclock_t value; 511 u_int64_t apic_delay_tsc = 0; 512 int use_tsc_calibration = 0; 513 514 /* No need to calibrate lapic_timer, if we will use TSC Deadline mode */ 515 if (lapic_use_tscdeadline) { 516 lapic_timer_tscfreq_shift = 0; 517 while ((tsc_frequency >> lapic_timer_tscfreq_shift) > INT_MAX) 518 lapic_timer_tscfreq_shift++; 519 lapic_cputimer_intr.freq = 520 tsc_frequency >> lapic_timer_tscfreq_shift; 521 kprintf( 522 "lapic: TSC Deadline Mode: shift %d, frequency %u Hz\n", 523 lapic_timer_tscfreq_shift, lapic_cputimer_intr.freq); 524 return; 525 } 526 527 /* 528 * On real hardware, tsc_invariant == 0 wouldn't be an issue, but in 529 * a virtual machine the frequency may get changed by the host. 530 */ 531 if (tsc_frequency != 0 && tsc_invariant && lapic_calibrate_fast) 532 use_tsc_calibration = 1; 533 534 if (use_tsc_calibration) { 535 u_int64_t min_apic_tsc = 0, max_apic_tsc = 0; 536 u_int64_t old_tsc, new_tsc; 537 sysclock_t val; 538 int i; 539 540 /* warm up */ 541 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT); 542 for (i = 0; i < 10; i++) 543 val = lapic->ccr_timer; 544 545 for (i = 0; i < 100; i++) { 546 old_tsc = rdtsc_ordered(); 547 val = lapic->ccr_timer; 548 new_tsc = rdtsc_ordered(); 549 new_tsc -= old_tsc; 550 apic_delay_tsc += new_tsc; 551 if (min_apic_tsc == 0 || 552 min_apic_tsc > new_tsc) { 553 min_apic_tsc = new_tsc; 554 } 555 if (max_apic_tsc < new_tsc) 556 max_apic_tsc = new_tsc; 557 } 558 apic_delay_tsc /= 100; 559 kprintf( 560 "LAPIC latency (in TSC ticks): %lu min: %lu max: %lu\n", 561 apic_delay_tsc, min_apic_tsc, max_apic_tsc); 562 apic_delay_tsc = min_apic_tsc; 563 } 564 565 if (!use_tsc_calibration) { 566 int i; 567 568 /* 569 * Do some exercising of the lapic timer access. This improves 570 * precision of the subsequent calibration run in at least some 571 * virtualization cases. 572 */ 573 lapic_timer_set_divisor(0); 574 for (i = 0; i < 10; i++) 575 (void)do_cputimer_calibration(100); 576 } 577 /* Try to calibrate the local APIC timer. */ 578 for (lapic_timer_divisor_idx = 0; 579 lapic_timer_divisor_idx < APIC_TIMER_NDIVISORS; 580 lapic_timer_divisor_idx++) { 581 lapic_timer_set_divisor(lapic_timer_divisor_idx); 582 if (use_tsc_calibration) { 583 value = do_tsc_calibration(200*1000, apic_delay_tsc); 584 } else { 585 value = do_cputimer_calibration(2*1000*1000); 586 } 587 if (value != 0) 588 break; 589 } 590 if (lapic_timer_divisor_idx >= APIC_TIMER_NDIVISORS) 591 panic("lapic: no proper timer divisor?!"); 592 lapic_cputimer_intr.freq = value; 593 594 kprintf("lapic: divisor index %d, frequency %u Hz\n", 595 lapic_timer_divisor_idx, lapic_cputimer_intr.freq); 596 597 if (lapic_calibrate_test > 0) { 598 uint64_t freq; 599 int i; 600 601 for (i = 1; i <= 20; i++) { 602 if (use_tsc_calibration) { 603 freq = do_tsc_calibration(i*100*1000, 604 apic_delay_tsc); 605 } else { 606 freq = do_cputimer_calibration(i*100*1000); 607 } 608 if (freq != 0) 609 kprintf("%ums: %lu\n", i * 100, freq); 610 } 611 } 612 } 613 614 static void 615 lapic_timer_tscdlt_reload(struct cputimer_intr *cti, sysclock_t reload) 616 { 617 struct globaldata *gd = mycpu; 618 uint64_t diff, now, val; 619 620 if (reload > 1000*1000*1000) 621 reload = 1000*1000*1000; 622 diff = (uint64_t)reload * tsc_frequency / sys_cputimer->freq; 623 if (diff < 4) 624 diff = 4; 625 if (cpu_vendor_id == CPU_VENDOR_INTEL) 626 cpu_lfence(); 627 else 628 cpu_mfence(); 629 now = rdtsc(); 630 val = now + diff; 631 if (gd->gd_timer_running) { 632 uint64_t deadline = tsc_deadlines[mycpuid].timestamp; 633 if (deadline == 0 || now > deadline || val < deadline) { 634 wrmsr(MSR_TSC_DEADLINE, val); 635 tsc_deadlines[mycpuid].timestamp = val; 636 } 637 } else { 638 gd->gd_timer_running = 1; 639 wrmsr(MSR_TSC_DEADLINE, val); 640 tsc_deadlines[mycpuid].timestamp = val; 641 } 642 } 643 644 static void 645 lapic_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload) 646 { 647 struct globaldata *gd = mycpu; 648 649 reload = (int64_t)reload * cti->freq / sys_cputimer->freq; 650 if (reload < 2) 651 reload = 2; 652 653 if (gd->gd_timer_running) { 654 if (reload < lapic->ccr_timer) 655 lapic_timer_oneshot_quick(reload); 656 } else { 657 gd->gd_timer_running = 1; 658 lapic_timer_oneshot_quick(reload); 659 } 660 } 661 662 static void 663 lapic_timer_intr_enable(struct cputimer_intr *cti __unused) 664 { 665 uint32_t timer; 666 667 timer = lapic->lvt_timer; 668 timer &= ~(APIC_LVTT_MASKED | APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT); 669 if (lapic_use_tscdeadline) 670 timer |= APIC_LVTT_TSCDLT; 671 lapic->lvt_timer = timer; 672 if (lapic_use_tscdeadline) 673 cpu_mfence(); 674 675 lapic_timer_fixup_handler(NULL); 676 } 677 678 static void 679 lapic_timer_fixup_handler(void *arg) 680 { 681 int *started = arg; 682 683 if (started != NULL) 684 *started = 0; 685 686 if (cpu_vendor_id == CPU_VENDOR_AMD) { 687 /* 688 * Detect the presence of C1E capability mostly on latest 689 * dual-cores (or future) k8 family. This feature renders 690 * the local APIC timer dead, so we disable it by reading 691 * the Interrupt Pending Message register and clearing both 692 * C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27). 693 * 694 * Reference: 695 * "BIOS and Kernel Developer's Guide for AMD NPT 696 * Family 0Fh Processors" 697 * #32559 revision 3.00 698 */ 699 if ((cpu_id & 0x00000f00) == 0x00000f00 && 700 (cpu_id & 0x0fff0000) >= 0x00040000) { 701 uint64_t msr; 702 703 msr = rdmsr(0xc0010055); 704 if (msr & 0x18000000) { 705 struct globaldata *gd = mycpu; 706 707 kprintf("cpu%d: AMD C1E detected\n", 708 gd->gd_cpuid); 709 wrmsr(0xc0010055, msr & ~0x18000000ULL); 710 711 /* 712 * We are kinda stalled; 713 * kick start again. 714 */ 715 gd->gd_timer_running = 1; 716 if (lapic_use_tscdeadline) { 717 /* Maybe reached in Virtual Machines? */ 718 lapic_timer_tscdeadline_quick(5000); 719 } else { 720 lapic_timer_oneshot_quick(2); 721 } 722 723 if (started != NULL) 724 *started = 1; 725 } 726 } 727 } 728 } 729 730 static void 731 lapic_timer_restart_handler(void *dummy __unused) 732 { 733 int started; 734 735 lapic_timer_fixup_handler(&started); 736 if (!started) { 737 struct globaldata *gd = mycpu; 738 739 gd->gd_timer_running = 1; 740 if (lapic_use_tscdeadline) { 741 /* Maybe reached in Virtual Machines? */ 742 lapic_timer_tscdeadline_quick(5000); 743 } else { 744 lapic_timer_oneshot_quick(2); 745 } 746 } 747 } 748 749 /* 750 * This function is called only by ACPICA code currently: 751 * - AMD C1E fixup. AMD C1E only seems to happen after ACPI 752 * module controls PM. So once ACPICA is attached, we try 753 * to apply the fixup to prevent LAPIC timer from hanging. 754 */ 755 static void 756 lapic_timer_intr_pmfixup(struct cputimer_intr *cti __unused) 757 { 758 lwkt_send_ipiq_mask(smp_active_mask, 759 lapic_timer_fixup_handler, NULL); 760 } 761 762 static void 763 lapic_timer_intr_restart(struct cputimer_intr *cti __unused) 764 { 765 lwkt_send_ipiq_mask(smp_active_mask, lapic_timer_restart_handler, NULL); 766 } 767 768 769 /* 770 * dump contents of local APIC registers 771 */ 772 void 773 apic_dump(char* str) 774 { 775 kprintf("SMP: CPU%d %s:\n", mycpu->gd_cpuid, str); 776 kprintf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", 777 lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr); 778 } 779 780 /* 781 * Inter Processor Interrupt functions. 782 */ 783 784 /* 785 * Send APIC IPI 'vector' to 'destType' via 'deliveryMode'. 786 * 787 * destType is 1 of: APIC_DEST_SELF, APIC_DEST_ALLISELF, APIC_DEST_ALLESELF 788 * vector is any valid SYSTEM INT vector 789 * delivery_mode is 1 of: APIC_DELMODE_FIXED, APIC_DELMODE_LOWPRIO 790 * 791 * WARNINGS! 792 * 793 * We now implement a per-cpu interlock (gd->gd_npoll) to prevent more than 794 * one IPI from being sent to any given cpu at a time. Thus we no longer 795 * have to process incoming IPIs while waiting for the status to clear. 796 * No deadlock should be possible. 797 * 798 * We now physically disable interrupts for the lapic ICR operation. If 799 * we do not do this then it looks like an EOI sent to the lapic (which 800 * occurs even with a critical section) can interfere with the command 801 * register ready status and cause an IPI to be lost. 802 * 803 * e.g. an interrupt can occur, issue the EOI, IRET, and cause the command 804 * register to busy just before we write to icr_lo, resulting in a lost 805 * issuance. This only appears to occur on Intel cpus and is not 806 * documented. It could simply be that cpus are so fast these days that 807 * it was always an issue, but is only now rearing its ugly head. This 808 * is conjecture. 809 */ 810 int 811 apic_ipi(int dest_type, int vector, int delivery_mode) 812 { 813 uint32_t icr_hi; 814 uint32_t icr_lo; 815 int64_t tsc; 816 int loops = 1; 817 818 if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { 819 tsc = rdtsc(); 820 while ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { 821 cpu_pause(); 822 if ((tsc_sclock_t)(rdtsc() - 823 (tsc + tsc_frequency)) > 0) { 824 kprintf("apic_ipi stall cpu %d (sing)\n", 825 mycpuid); 826 tsc = rdtsc(); 827 if (++loops > 30) 828 panic("apic stall"); 829 } 830 } 831 } 832 icr_hi = lapic->icr_hi & ~APIC_ID_MASK; 833 icr_lo = (lapic->icr_lo & APIC_ICRLO_RESV_MASK) | dest_type | 834 APIC_LEVEL_ASSERT | delivery_mode | vector; 835 lapic->icr_hi = icr_hi; 836 lapic->icr_lo = icr_lo; 837 838 return 0; 839 } 840 841 /* 842 * Interrupts must be hard-disabled by caller 843 */ 844 void 845 single_apic_ipi(int cpu, int vector, int delivery_mode) 846 { 847 uint32_t icr_lo; 848 uint32_t icr_hi; 849 int64_t tsc; 850 int loops = 1; 851 852 if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { 853 tsc = rdtsc(); 854 while ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { 855 cpu_pause(); 856 if ((tsc_sclock_t)(rdtsc() - 857 (tsc + tsc_frequency)) > 0) { 858 kprintf("single_apic_ipi stall cpu %d (sing)\n", 859 mycpuid); 860 tsc = rdtsc(); 861 if (++loops > 30) 862 panic("apic stall"); 863 } 864 } 865 } 866 icr_hi = lapic->icr_hi & ~APIC_ID_MASK; 867 icr_hi |= (CPUID_TO_APICID(cpu) << 24); 868 869 /* build ICR_LOW */ 870 icr_lo = (lapic->icr_lo & APIC_ICRLO_RESV_MASK) | 871 APIC_LEVEL_ASSERT | APIC_DEST_DESTFLD | delivery_mode | vector; 872 873 /* write APIC ICR */ 874 lapic->icr_hi = icr_hi; 875 lapic->icr_lo = icr_lo; 876 } 877 878 #if 0 879 880 /* 881 * Returns 0 if the apic is busy, 1 if we were able to queue the request. 882 * 883 * NOT WORKING YET! The code as-is may end up not queueing an IPI at all 884 * to the target, and the scheduler does not 'poll' for IPI messages. 885 */ 886 int 887 single_apic_ipi_passive(int cpu, int vector, int delivery_mode) 888 { 889 u_long icr_lo; 890 u_long icr_hi; 891 unsigned long rflags; 892 893 rflags = read_rflags(); 894 cpu_disable_intr(); 895 if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { 896 write_rflags(rflags); 897 return(0); 898 } 899 icr_hi = lapic->icr_hi & ~APIC_ID_MASK; 900 icr_hi |= (CPUID_TO_APICID(cpu) << 24); 901 lapic->icr_hi = icr_hi; 902 903 /* build IRC_LOW */ 904 icr_lo = (lapic->icr_lo & APIC_RESV2_MASK) | 905 APIC_DEST_DESTFLD | delivery_mode | vector; 906 907 /* write APIC ICR */ 908 lapic->icr_lo = icr_lo; 909 write_rflags(rflags); 910 911 return(1); 912 } 913 914 #endif 915 916 /* 917 * Send APIC IPI 'vector' to 'target's via 'delivery_mode'. 918 * 919 * target is a bitmask of destination cpus. Vector is any 920 * valid system INT vector. Delivery mode may be either 921 * APIC_DELMODE_FIXED or APIC_DELMODE_LOWPRIO. 922 * 923 * Interrupts must be hard-disabled by caller 924 */ 925 void 926 selected_apic_ipi(cpumask_t target, int vector, int delivery_mode) 927 { 928 while (CPUMASK_TESTNZERO(target)) { 929 int n = BSFCPUMASK(target); 930 CPUMASK_NANDBIT(target, n); 931 single_apic_ipi(n, vector, delivery_mode); 932 } 933 } 934 935 /* 936 * Load a 'downcount time' in uSeconds. 937 */ 938 void 939 set_apic_timer(int us) 940 { 941 u_int count; 942 943 if (lapic_use_tscdeadline) { 944 uint64_t val; 945 946 val = lapic_scale_to_tsc(us, 1000000); 947 val += rdtsc(); 948 /* No need to arm the lapic here, just track the timeout. */ 949 tsc_deadlines[mycpuid].downcount_time = val; 950 return; 951 } 952 953 /* 954 * When we reach here, lapic timer's frequency 955 * must have been calculated as well as the 956 * divisor (lapic->dcr_timer is setup during the 957 * divisor calculation). 958 */ 959 KKASSERT(lapic_cputimer_intr.freq != 0 && 960 lapic_timer_divisor_idx >= 0); 961 962 count = ((us * (int64_t)lapic_cputimer_intr.freq) + 999999) / 1000000; 963 lapic_timer_oneshot(count); 964 } 965 966 967 /* 968 * Read remaining time in timer, in microseconds (rounded up). 969 */ 970 int 971 read_apic_timer(void) 972 { 973 uint64_t val; 974 975 if (lapic_use_tscdeadline) { 976 uint64_t now; 977 978 val = tsc_deadlines[mycpuid].downcount_time; 979 now = rdtsc(); 980 if (val == 0 || now > val) { 981 return 0; 982 } else { 983 val -= now; 984 val *= 1000000; 985 val += (tsc_frequency - 1); 986 val /= tsc_frequency; 987 if (val > INT_MAX) 988 val = INT_MAX; 989 return val; 990 } 991 } 992 993 val = lapic->ccr_timer; 994 if (val == 0) 995 return 0; 996 997 KKASSERT(lapic_cputimer_intr.freq > 0); 998 val *= 1000000; 999 val += (lapic_cputimer_intr.freq - 1); 1000 val /= lapic_cputimer_intr.freq; 1001 if (val > INT_MAX) 1002 val = INT_MAX; 1003 return val; 1004 } 1005 1006 1007 /* 1008 * Spin-style delay, set delay time in uS, spin till it drains. 1009 */ 1010 void 1011 u_sleep(int count) 1012 { 1013 set_apic_timer(count); 1014 while (read_apic_timer()) 1015 /* spin */ ; 1016 } 1017 1018 int 1019 lapic_unused_apic_id(int start) 1020 { 1021 int i; 1022 1023 for (i = start; i < APICID_MAX; ++i) { 1024 if (APICID_TO_CPUID(i) == -1) 1025 return i; 1026 } 1027 return NAPICID; 1028 } 1029 1030 void 1031 lapic_map(vm_paddr_t lapic_addr) 1032 { 1033 lapic = pmap_mapdev_uncacheable(lapic_addr, sizeof(struct LAPIC)); 1034 } 1035 1036 static TAILQ_HEAD(, lapic_enumerator) lapic_enumerators = 1037 TAILQ_HEAD_INITIALIZER(lapic_enumerators); 1038 1039 int 1040 lapic_config(void) 1041 { 1042 struct lapic_enumerator *e; 1043 int error, i, ap_max; 1044 1045 KKASSERT(lapic_enable); 1046 1047 for (i = 0; i < NAPICID; ++i) 1048 APICID_TO_CPUID(i) = -1; 1049 1050 TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) { 1051 error = e->lapic_probe(e); 1052 if (!error) 1053 break; 1054 } 1055 if (e == NULL) { 1056 kprintf("LAPIC: Can't find LAPIC\n"); 1057 return ENXIO; 1058 } 1059 1060 error = e->lapic_enumerate(e); 1061 if (error) { 1062 kprintf("LAPIC: enumeration failed\n"); 1063 return ENXIO; 1064 } 1065 1066 ap_max = MAXCPU - 1; 1067 TUNABLE_INT_FETCH("hw.ap_max", &ap_max); 1068 if (ap_max > MAXCPU - 1) 1069 ap_max = MAXCPU - 1; 1070 1071 if (naps > ap_max) { 1072 kprintf("LAPIC: Warning use only %d out of %d " 1073 "available APs\n", 1074 ap_max, naps); 1075 naps = ap_max; 1076 } 1077 1078 return 0; 1079 } 1080 1081 void 1082 lapic_enumerator_register(struct lapic_enumerator *ne) 1083 { 1084 struct lapic_enumerator *e; 1085 1086 TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) { 1087 if (e->lapic_prio < ne->lapic_prio) { 1088 TAILQ_INSERT_BEFORE(e, ne, lapic_link); 1089 return; 1090 } 1091 } 1092 TAILQ_INSERT_TAIL(&lapic_enumerators, ne, lapic_link); 1093 } 1094 1095 void 1096 lapic_set_cpuid(int cpu_id, int apic_id) 1097 { 1098 CPUID_TO_APICID(cpu_id) = apic_id; 1099 APICID_TO_CPUID(apic_id) = cpu_id; 1100 } 1101 1102 void 1103 lapic_fixup_noioapic(void) 1104 { 1105 u_int temp; 1106 1107 /* Only allowed on BSP */ 1108 KKASSERT(mycpuid == 0); 1109 KKASSERT(!ioapic_enable); 1110 1111 temp = lapic->lvt_lint0; 1112 temp &= ~APIC_LVT_MASKED; 1113 lapic->lvt_lint0 = temp; 1114 1115 temp = lapic->lvt_lint1; 1116 temp |= APIC_LVT_MASKED; 1117 lapic->lvt_lint1 = temp; 1118 } 1119 1120 static void 1121 lapic_sysinit(void *dummy __unused) 1122 { 1123 if (lapic_enable) { 1124 int error; 1125 1126 error = lapic_config(); 1127 if (error) 1128 lapic_enable = 0; 1129 } 1130 1131 if (lapic_enable) { 1132 /* Initialize BSP's local APIC */ 1133 lapic_init(TRUE); 1134 } else if (ioapic_enable) { 1135 ioapic_enable = 0; 1136 icu_reinit_noioapic(); 1137 } 1138 } 1139 SYSINIT(lapic, SI_BOOT2_LAPIC, SI_ORDER_FIRST, lapic_sysinit, NULL); 1140