1 /* 2 * Copyright (c) 1996, by Steve Passe 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. The name of the developer may NOT be used to endorse or promote products 11 * derived from this software without specific prior written permission. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: src/sys/i386/i386/mpapic.c,v 1.37.2.7 2003/01/25 02:31:47 peter Exp $ 26 */ 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/kernel.h> 31 #include <sys/bus.h> 32 #include <sys/machintr.h> 33 #include <machine/globaldata.h> 34 #include <machine/clock.h> 35 #include <machine/limits.h> 36 #include <machine/smp.h> 37 #include <machine/md_var.h> 38 #include <machine/pmap.h> 39 #include <machine/specialreg.h> 40 #include <machine_base/apic/lapic.h> 41 #include <machine_base/apic/ioapic.h> 42 #include <machine_base/apic/ioapic_abi.h> 43 #include <machine_base/apic/apicvar.h> 44 #include <machine_base/icu/icu_var.h> 45 #include <machine/segments.h> 46 #include <sys/thread2.h> 47 #include <sys/spinlock2.h> 48 49 #include <machine/cputypes.h> 50 #include <machine/intr_machdep.h> 51 52 extern int naps; 53 54 volatile lapic_t *lapic; 55 56 static void lapic_timer_calibrate(void); 57 static void lapic_timer_set_divisor(int); 58 static void lapic_timer_fixup_handler(void *); 59 static void lapic_timer_restart_handler(void *); 60 61 62 static int lapic_timer_enable = 1; 63 TUNABLE_INT("hw.lapic_timer_enable", &lapic_timer_enable); 64 65 static int lapic_timer_tscdeadline = 1; 66 TUNABLE_INT("hw.lapic_timer_tscdeadline", &lapic_timer_tscdeadline); 67 68 static void lapic_timer_tscdlt_reload(struct cputimer_intr *, sysclock_t); 69 static void lapic_timer_intr_reload(struct cputimer_intr *, sysclock_t); 70 static void lapic_timer_intr_enable(struct cputimer_intr *); 71 static void lapic_timer_intr_restart(struct cputimer_intr *); 72 static void lapic_timer_intr_pmfixup(struct cputimer_intr *); 73 74 static struct cputimer_intr lapic_cputimer_intr = { 75 .freq = 0, 76 .reload = lapic_timer_intr_reload, 77 .enable = lapic_timer_intr_enable, 78 .config = cputimer_intr_default_config, 79 .restart = lapic_timer_intr_restart, 80 .pmfixup = lapic_timer_intr_pmfixup, 81 .initclock = cputimer_intr_default_initclock, 82 .pcpuhand = NULL, 83 .next = SLIST_ENTRY_INITIALIZER, 84 .name = "lapic", 85 .type = CPUTIMER_INTR_LAPIC, 86 .prio = CPUTIMER_INTR_PRIO_LAPIC, 87 .caps = CPUTIMER_INTR_CAP_NONE, 88 .priv = NULL 89 }; 90 91 static int lapic_timer_divisor_idx = -1; 92 static const uint32_t lapic_timer_divisors[] = { 93 APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, 94 APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128, APIC_TDCR_1 95 }; 96 #define APIC_TIMER_NDIVISORS (int)(NELEM(lapic_timer_divisors)) 97 98 static int lapic_use_tscdeadline = 0; 99 /* The raw TSC frequency might not fit into a sysclock_t value. */ 100 static int lapic_timer_tscfreq_shift; 101 102 /* 103 * APIC ID <-> CPU ID mapping structures. 104 */ 105 int cpu_id_to_apic_id[NAPICID]; 106 int apic_id_to_cpu_id[NAPICID]; 107 int lapic_enable = 1; 108 109 /* Separate cachelines for each cpu's info. */ 110 struct deadlines { 111 uint64_t timestamp; 112 uint64_t downcount_time; 113 uint64_t padding[6]; 114 }; 115 struct deadlines *tsc_deadlines = NULL; 116 117 /* 118 * Enable LAPIC, configure interrupts. 119 */ 120 void 121 lapic_init(boolean_t bsp) 122 { 123 uint32_t timer; 124 u_int temp; 125 126 if (bsp) { 127 /* Decide whether we want to use TSC Deadline mode. */ 128 if (lapic_timer_tscdeadline != 0 && 129 (cpu_feature2 & CPUID2_TSCDLT) && 130 tsc_invariant && tsc_frequency != 0) { 131 lapic_use_tscdeadline = 1; 132 tsc_deadlines = kmalloc_cachealign( 133 sizeof(struct deadlines) * (naps + 1), 134 M_DEVBUF, M_WAITOK | M_ZERO); 135 } 136 } 137 138 /* 139 * Install vectors 140 * 141 * Since IDT is shared between BSP and APs, these vectors 142 * only need to be installed once; we do it on BSP. 143 */ 144 if (bsp) { 145 if (cpu_vendor_id == CPU_VENDOR_AMD && 146 CPUID_TO_FAMILY(cpu_id) >= 0x0f && 147 CPUID_TO_FAMILY(cpu_id) < 0x17) { /* XXX */ 148 uint32_t tcr; 149 150 /* 151 * Set the LINTEN bit in the HyperTransport 152 * Transaction Control Register. 153 * 154 * This will cause EXTINT and NMI interrupts 155 * routed over the hypertransport bus to be 156 * fed into the LAPIC LINT0/LINT1. If the bit 157 * isn't set, the interrupts will go to the 158 * general cpu INTR/NMI pins. On a dual-core 159 * cpu the interrupt winds up going to BOTH cpus. 160 * The first cpu that does the interrupt ack 161 * cycle will get the correct interrupt. The 162 * second cpu that does it will get a spurious 163 * interrupt vector (typically IRQ 7). 164 */ 165 outl(0x0cf8, 166 (1 << 31) | /* enable */ 167 (0 << 16) | /* bus */ 168 (0x18 << 11) | /* dev (cpu + 0x18) */ 169 (0 << 8) | /* func */ 170 0x68 /* reg */ 171 ); 172 tcr = inl(0xcfc); 173 if ((tcr & 0x00010000) == 0) { 174 kprintf("LAPIC: AMD LINTEN on\n"); 175 outl(0xcfc, tcr|0x00010000); 176 } 177 outl(0x0cf8, 0); 178 } 179 180 /* Install a 'Spurious INTerrupt' vector */ 181 setidt_global(XSPURIOUSINT_OFFSET, Xspuriousint, 182 SDT_SYSIGT, SEL_KPL, 0); 183 184 /* Install a timer vector */ 185 setidt_global(XTIMER_OFFSET, Xtimer, 186 SDT_SYSIGT, SEL_KPL, 0); 187 188 /* Install an inter-CPU IPI for TLB invalidation */ 189 setidt_global(XINVLTLB_OFFSET, Xinvltlb, 190 SDT_SYSIGT, SEL_KPL, 0); 191 192 /* Install an inter-CPU IPI for IPIQ messaging */ 193 setidt_global(XIPIQ_OFFSET, Xipiq, 194 SDT_SYSIGT, SEL_KPL, 0); 195 196 /* Install an inter-CPU IPI for CPU stop/restart */ 197 setidt_global(XCPUSTOP_OFFSET, Xcpustop, 198 SDT_SYSIGT, SEL_KPL, 0); 199 200 /* Install an inter-CPU IPI for TLB invalidation */ 201 setidt_global(XSNIFF_OFFSET, Xsniff, 202 SDT_SYSIGT, SEL_KPL, 0); 203 } 204 205 /* 206 * Setup LINT0 as ExtINT on the BSP. This is theoretically an 207 * aggregate interrupt input from the 8259. The INTA cycle 208 * will be routed to the external controller (the 8259) which 209 * is expected to supply the vector. 210 * 211 * Must be setup edge triggered, active high. 212 * 213 * Disable LINT0 on BSP, if I/O APIC is enabled. 214 * 215 * Disable LINT0 on the APs. It doesn't matter what delivery 216 * mode we use because we leave it masked. 217 */ 218 temp = lapic->lvt_lint0; 219 temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | 220 APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); 221 if (bsp) { 222 temp |= APIC_LVT_DM_EXTINT; 223 if (ioapic_enable) 224 temp |= APIC_LVT_MASKED; 225 } else { 226 temp |= APIC_LVT_DM_FIXED | APIC_LVT_MASKED; 227 } 228 lapic->lvt_lint0 = temp; 229 230 /* 231 * Setup LINT1 as NMI. 232 * 233 * Must be setup edge trigger, active high. 234 * 235 * Enable LINT1 on BSP, if I/O APIC is enabled. 236 * 237 * Disable LINT1 on the APs. 238 */ 239 temp = lapic->lvt_lint1; 240 temp &= ~(APIC_LVT_MASKED | APIC_LVT_TRIG_MASK | 241 APIC_LVT_POLARITY_MASK | APIC_LVT_DM_MASK); 242 temp |= APIC_LVT_MASKED | APIC_LVT_DM_NMI; 243 if (bsp && ioapic_enable) 244 temp &= ~APIC_LVT_MASKED; 245 lapic->lvt_lint1 = temp; 246 247 /* 248 * Mask the LAPIC error interrupt, LAPIC performance counter 249 * interrupt. 250 */ 251 lapic->lvt_error = lapic->lvt_error | APIC_LVT_MASKED; 252 lapic->lvt_pcint = lapic->lvt_pcint | APIC_LVT_MASKED; 253 254 /* 255 * Set LAPIC timer vector and mask the LAPIC timer interrupt. 256 */ 257 timer = lapic->lvt_timer; 258 timer &= ~APIC_LVTT_VECTOR; 259 timer |= XTIMER_OFFSET; 260 timer |= APIC_LVTT_MASKED; 261 lapic->lvt_timer = timer; 262 263 /* 264 * Set the Task Priority Register as needed. At the moment allow 265 * interrupts on all cpus (the APs will remain CLId until they are 266 * ready to deal). 267 */ 268 temp = lapic->tpr; 269 temp &= ~APIC_TPR_PRIO; /* clear priority field */ 270 lapic->tpr = temp; 271 272 /* 273 * AMD specific setup 274 */ 275 if (cpu_vendor_id == CPU_VENDOR_AMD && 276 (lapic->version & APIC_VER_AMD_EXT_SPACE)) { 277 uint32_t ext_feat; 278 uint32_t count; 279 uint32_t max_count; 280 uint32_t lvt; 281 uint32_t i; 282 283 ext_feat = lapic->ext_feat; 284 count = (ext_feat & APIC_EXTFEAT_MASK) >> APIC_EXTFEAT_SHIFT; 285 max_count = sizeof(lapic->ext_lvt) / sizeof(lapic->ext_lvt[0]); 286 if (count > max_count) 287 count = max_count; 288 for (i = 0; i < count; ++i) { 289 lvt = lapic->ext_lvt[i].lvt; 290 291 lvt &= ~(APIC_LVT_POLARITY_MASK | APIC_LVT_TRIG_MASK | 292 APIC_LVT_DM_MASK | APIC_LVT_MASKED); 293 lvt |= APIC_LVT_MASKED | APIC_LVT_DM_FIXED; 294 295 switch(i) { 296 case APIC_EXTLVT_IBS: 297 break; 298 case APIC_EXTLVT_MCA: 299 break; 300 case APIC_EXTLVT_DEI: 301 break; 302 case APIC_EXTLVT_SBI: 303 break; 304 default: 305 break; 306 } 307 if (bsp) { 308 kprintf(" LAPIC AMD elvt%d: 0x%08x", 309 i, lapic->ext_lvt[i].lvt); 310 if (lapic->ext_lvt[i].lvt != lvt) 311 kprintf(" -> 0x%08x", lvt); 312 kprintf("\n"); 313 } 314 lapic->ext_lvt[i].lvt = lvt; 315 } 316 } 317 318 /* 319 * Enable the LAPIC 320 */ 321 temp = lapic->svr; 322 temp |= APIC_SVR_ENABLE; /* enable the LAPIC */ 323 temp &= ~APIC_SVR_FOCUS_DISABLE; /* enable lopri focus processor */ 324 325 if (lapic->version & APIC_VER_EOI_SUPP) { 326 if (temp & APIC_SVR_EOI_SUPP) { 327 temp &= ~APIC_SVR_EOI_SUPP; 328 if (bsp) 329 kprintf(" LAPIC disabling EOI supp\n"); 330 } 331 } 332 333 /* 334 * Set the spurious interrupt vector. The low 4 bits of the vector 335 * must be 1111. 336 */ 337 if ((XSPURIOUSINT_OFFSET & 0x0F) != 0x0F) 338 panic("bad XSPURIOUSINT_OFFSET: 0x%08x", XSPURIOUSINT_OFFSET); 339 temp &= ~APIC_SVR_VECTOR; 340 temp |= XSPURIOUSINT_OFFSET; 341 342 lapic->svr = temp; 343 344 /* 345 * Pump out a few EOIs to clean out interrupts that got through 346 * before we were able to set the TPR. 347 */ 348 lapic->eoi = 0; 349 lapic->eoi = 0; 350 lapic->eoi = 0; 351 352 if (bsp) { 353 lapic_timer_calibrate(); 354 if (lapic_timer_enable) { 355 if (cpu_thermal_feature & CPUID_THERMAL_ARAT) { 356 /* 357 * Local APIC timer will not stop 358 * in deep C-state. 359 */ 360 lapic_cputimer_intr.caps |= 361 CPUTIMER_INTR_CAP_PS; 362 } 363 if (lapic_use_tscdeadline) { 364 lapic_cputimer_intr.reload = 365 lapic_timer_tscdlt_reload; 366 } 367 cputimer_intr_register(&lapic_cputimer_intr); 368 cputimer_intr_select(&lapic_cputimer_intr, 0); 369 } 370 } else if (!lapic_use_tscdeadline) { 371 lapic_timer_set_divisor(lapic_timer_divisor_idx); 372 } 373 374 if (bootverbose) 375 apic_dump("apic_initialize()"); 376 } 377 378 static void 379 lapic_timer_set_divisor(int divisor_idx) 380 { 381 KKASSERT(divisor_idx >= 0 && divisor_idx < APIC_TIMER_NDIVISORS); 382 lapic->dcr_timer = lapic_timer_divisors[divisor_idx]; 383 } 384 385 static void 386 lapic_timer_oneshot(u_int count) 387 { 388 uint32_t value; 389 390 value = lapic->lvt_timer; 391 value &= ~(APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT); 392 lapic->lvt_timer = value; 393 lapic->icr_timer = count; 394 } 395 396 static void 397 lapic_timer_oneshot_quick(u_int count) 398 { 399 lapic->icr_timer = count; 400 } 401 402 static void 403 lapic_timer_tscdeadline_quick(uint64_t diff) 404 { 405 uint64_t val = rdtsc() + diff; 406 407 wrmsr(MSR_TSC_DEADLINE, val); 408 tsc_deadlines[mycpuid].timestamp = val; 409 } 410 411 static uint64_t 412 lapic_scale_to_tsc(unsigned value, unsigned scale) 413 { 414 uint64_t val; 415 416 val = value; 417 val *= tsc_frequency; 418 val += (scale - 1); 419 val /= scale; 420 return val; 421 } 422 423 static void 424 lapic_timer_calibrate(void) 425 { 426 sysclock_t value; 427 428 /* No need to calibrate lapic_timer, if we will use TSC Deadline mode */ 429 if (lapic_use_tscdeadline) { 430 lapic_timer_tscfreq_shift = 0; 431 while ((tsc_frequency >> lapic_timer_tscfreq_shift) > INT_MAX) 432 lapic_timer_tscfreq_shift++; 433 lapic_cputimer_intr.freq = 434 tsc_frequency >> lapic_timer_tscfreq_shift; 435 kprintf( 436 "lapic: TSC Deadline Mode: shift %d, frequency %u Hz\n", 437 lapic_timer_tscfreq_shift, lapic_cputimer_intr.freq); 438 return; 439 } 440 441 /* Try to calibrate the local APIC timer. */ 442 for (lapic_timer_divisor_idx = 0; 443 lapic_timer_divisor_idx < APIC_TIMER_NDIVISORS; 444 lapic_timer_divisor_idx++) { 445 lapic_timer_set_divisor(lapic_timer_divisor_idx); 446 lapic_timer_oneshot(APIC_TIMER_MAX_COUNT); 447 DELAY(2000000); 448 value = APIC_TIMER_MAX_COUNT - lapic->ccr_timer; 449 if (value != APIC_TIMER_MAX_COUNT) 450 break; 451 } 452 if (lapic_timer_divisor_idx >= APIC_TIMER_NDIVISORS) 453 panic("lapic: no proper timer divisor?!"); 454 lapic_cputimer_intr.freq = value / 2; 455 456 kprintf("lapic: divisor index %d, frequency %u Hz\n", 457 lapic_timer_divisor_idx, lapic_cputimer_intr.freq); 458 } 459 460 static void 461 lapic_timer_tscdlt_reload(struct cputimer_intr *cti, sysclock_t reload) 462 { 463 struct globaldata *gd = mycpu; 464 uint64_t diff, now, val; 465 466 if (reload > 1000*1000*1000) 467 reload = 1000*1000*1000; 468 diff = (uint64_t)reload * tsc_frequency / sys_cputimer->freq; 469 if (diff < 4) 470 diff = 4; 471 if (cpu_vendor_id == CPU_VENDOR_INTEL) 472 cpu_lfence(); 473 else 474 cpu_mfence(); 475 now = rdtsc(); 476 val = now + diff; 477 if (gd->gd_timer_running) { 478 uint64_t deadline = tsc_deadlines[mycpuid].timestamp; 479 if (deadline == 0 || now > deadline || val < deadline) { 480 wrmsr(MSR_TSC_DEADLINE, val); 481 tsc_deadlines[mycpuid].timestamp = val; 482 } 483 } else { 484 gd->gd_timer_running = 1; 485 wrmsr(MSR_TSC_DEADLINE, val); 486 tsc_deadlines[mycpuid].timestamp = val; 487 } 488 } 489 490 static void 491 lapic_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload) 492 { 493 struct globaldata *gd = mycpu; 494 495 reload = (int64_t)reload * cti->freq / sys_cputimer->freq; 496 if (reload < 2) 497 reload = 2; 498 499 if (gd->gd_timer_running) { 500 if (reload < lapic->ccr_timer) 501 lapic_timer_oneshot_quick(reload); 502 } else { 503 gd->gd_timer_running = 1; 504 lapic_timer_oneshot_quick(reload); 505 } 506 } 507 508 static void 509 lapic_timer_intr_enable(struct cputimer_intr *cti __unused) 510 { 511 uint32_t timer; 512 513 timer = lapic->lvt_timer; 514 timer &= ~(APIC_LVTT_MASKED | APIC_LVTT_PERIODIC | APIC_LVTT_TSCDLT); 515 if (lapic_use_tscdeadline) 516 timer |= APIC_LVTT_TSCDLT; 517 lapic->lvt_timer = timer; 518 if (lapic_use_tscdeadline) 519 cpu_mfence(); 520 521 lapic_timer_fixup_handler(NULL); 522 } 523 524 static void 525 lapic_timer_fixup_handler(void *arg) 526 { 527 int *started = arg; 528 529 if (started != NULL) 530 *started = 0; 531 532 if (cpu_vendor_id == CPU_VENDOR_AMD) { 533 /* 534 * Detect the presence of C1E capability mostly on latest 535 * dual-cores (or future) k8 family. This feature renders 536 * the local APIC timer dead, so we disable it by reading 537 * the Interrupt Pending Message register and clearing both 538 * C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27). 539 * 540 * Reference: 541 * "BIOS and Kernel Developer's Guide for AMD NPT 542 * Family 0Fh Processors" 543 * #32559 revision 3.00 544 */ 545 if ((cpu_id & 0x00000f00) == 0x00000f00 && 546 (cpu_id & 0x0fff0000) >= 0x00040000) { 547 uint64_t msr; 548 549 msr = rdmsr(0xc0010055); 550 if (msr & 0x18000000) { 551 struct globaldata *gd = mycpu; 552 553 kprintf("cpu%d: AMD C1E detected\n", 554 gd->gd_cpuid); 555 wrmsr(0xc0010055, msr & ~0x18000000ULL); 556 557 /* 558 * We are kinda stalled; 559 * kick start again. 560 */ 561 gd->gd_timer_running = 1; 562 if (lapic_use_tscdeadline) { 563 /* Maybe reached in Virtual Machines? */ 564 lapic_timer_tscdeadline_quick(5000); 565 } else { 566 lapic_timer_oneshot_quick(2); 567 } 568 569 if (started != NULL) 570 *started = 1; 571 } 572 } 573 } 574 } 575 576 static void 577 lapic_timer_restart_handler(void *dummy __unused) 578 { 579 int started; 580 581 lapic_timer_fixup_handler(&started); 582 if (!started) { 583 struct globaldata *gd = mycpu; 584 585 gd->gd_timer_running = 1; 586 if (lapic_use_tscdeadline) { 587 /* Maybe reached in Virtual Machines? */ 588 lapic_timer_tscdeadline_quick(5000); 589 } else { 590 lapic_timer_oneshot_quick(2); 591 } 592 } 593 } 594 595 /* 596 * This function is called only by ACPICA code currently: 597 * - AMD C1E fixup. AMD C1E only seems to happen after ACPI 598 * module controls PM. So once ACPICA is attached, we try 599 * to apply the fixup to prevent LAPIC timer from hanging. 600 */ 601 static void 602 lapic_timer_intr_pmfixup(struct cputimer_intr *cti __unused) 603 { 604 lwkt_send_ipiq_mask(smp_active_mask, 605 lapic_timer_fixup_handler, NULL); 606 } 607 608 static void 609 lapic_timer_intr_restart(struct cputimer_intr *cti __unused) 610 { 611 lwkt_send_ipiq_mask(smp_active_mask, lapic_timer_restart_handler, NULL); 612 } 613 614 615 /* 616 * dump contents of local APIC registers 617 */ 618 void 619 apic_dump(char* str) 620 { 621 kprintf("SMP: CPU%d %s:\n", mycpu->gd_cpuid, str); 622 kprintf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", 623 lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr); 624 } 625 626 /* 627 * Inter Processor Interrupt functions. 628 */ 629 630 /* 631 * Send APIC IPI 'vector' to 'destType' via 'deliveryMode'. 632 * 633 * destType is 1 of: APIC_DEST_SELF, APIC_DEST_ALLISELF, APIC_DEST_ALLESELF 634 * vector is any valid SYSTEM INT vector 635 * delivery_mode is 1 of: APIC_DELMODE_FIXED, APIC_DELMODE_LOWPRIO 636 * 637 * WARNINGS! 638 * 639 * We now implement a per-cpu interlock (gd->gd_npoll) to prevent more than 640 * one IPI from being sent to any given cpu at a time. Thus we no longer 641 * have to process incoming IPIs while waiting for the status to clear. 642 * No deadlock should be possible. 643 * 644 * We now physically disable interrupts for the lapic ICR operation. If 645 * we do not do this then it looks like an EOI sent to the lapic (which 646 * occurs even with a critical section) can interfere with the command 647 * register ready status and cause an IPI to be lost. 648 * 649 * e.g. an interrupt can occur, issue the EOI, IRET, and cause the command 650 * register to busy just before we write to icr_lo, resulting in a lost 651 * issuance. This only appears to occur on Intel cpus and is not 652 * documented. It could simply be that cpus are so fast these days that 653 * it was always an issue, but is only now rearing its ugly head. This 654 * is conjecture. 655 */ 656 int 657 apic_ipi(int dest_type, int vector, int delivery_mode) 658 { 659 uint32_t icr_hi; 660 uint32_t icr_lo; 661 int64_t tsc; 662 int loops = 1; 663 664 if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { 665 tsc = rdtsc(); 666 while ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { 667 cpu_pause(); 668 if ((tsc_sclock_t)(rdtsc() - 669 (tsc + tsc_frequency)) > 0) { 670 kprintf("apic_ipi stall cpu %d (sing)\n", 671 mycpuid); 672 tsc = rdtsc(); 673 if (++loops > 30) 674 panic("apic stall"); 675 } 676 } 677 } 678 icr_hi = lapic->icr_hi & ~APIC_ID_MASK; 679 icr_lo = (lapic->icr_lo & APIC_ICRLO_RESV_MASK) | dest_type | 680 APIC_LEVEL_ASSERT | delivery_mode | vector; 681 lapic->icr_hi = icr_hi; 682 lapic->icr_lo = icr_lo; 683 684 return 0; 685 } 686 687 /* 688 * Interrupts must be hard-disabled by caller 689 */ 690 void 691 single_apic_ipi(int cpu, int vector, int delivery_mode) 692 { 693 uint32_t icr_lo; 694 uint32_t icr_hi; 695 int64_t tsc; 696 int loops = 1; 697 698 if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { 699 tsc = rdtsc(); 700 while ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { 701 cpu_pause(); 702 if ((tsc_sclock_t)(rdtsc() - 703 (tsc + tsc_frequency)) > 0) { 704 kprintf("single_apic_ipi stall cpu %d (sing)\n", 705 mycpuid); 706 tsc = rdtsc(); 707 if (++loops > 30) 708 panic("apic stall"); 709 } 710 } 711 } 712 icr_hi = lapic->icr_hi & ~APIC_ID_MASK; 713 icr_hi |= (CPUID_TO_APICID(cpu) << 24); 714 715 /* build ICR_LOW */ 716 icr_lo = (lapic->icr_lo & APIC_ICRLO_RESV_MASK) | 717 APIC_LEVEL_ASSERT | APIC_DEST_DESTFLD | delivery_mode | vector; 718 719 /* write APIC ICR */ 720 lapic->icr_hi = icr_hi; 721 lapic->icr_lo = icr_lo; 722 } 723 724 #if 0 725 726 /* 727 * Returns 0 if the apic is busy, 1 if we were able to queue the request. 728 * 729 * NOT WORKING YET! The code as-is may end up not queueing an IPI at all 730 * to the target, and the scheduler does not 'poll' for IPI messages. 731 */ 732 int 733 single_apic_ipi_passive(int cpu, int vector, int delivery_mode) 734 { 735 u_long icr_lo; 736 u_long icr_hi; 737 unsigned long rflags; 738 739 rflags = read_rflags(); 740 cpu_disable_intr(); 741 if ((lapic->icr_lo & APIC_DELSTAT_MASK) != 0) { 742 write_rflags(rflags); 743 return(0); 744 } 745 icr_hi = lapic->icr_hi & ~APIC_ID_MASK; 746 icr_hi |= (CPUID_TO_APICID(cpu) << 24); 747 lapic->icr_hi = icr_hi; 748 749 /* build IRC_LOW */ 750 icr_lo = (lapic->icr_lo & APIC_RESV2_MASK) | 751 APIC_DEST_DESTFLD | delivery_mode | vector; 752 753 /* write APIC ICR */ 754 lapic->icr_lo = icr_lo; 755 write_rflags(rflags); 756 757 return(1); 758 } 759 760 #endif 761 762 /* 763 * Send APIC IPI 'vector' to 'target's via 'delivery_mode'. 764 * 765 * target is a bitmask of destination cpus. Vector is any 766 * valid system INT vector. Delivery mode may be either 767 * APIC_DELMODE_FIXED or APIC_DELMODE_LOWPRIO. 768 * 769 * Interrupts must be hard-disabled by caller 770 */ 771 void 772 selected_apic_ipi(cpumask_t target, int vector, int delivery_mode) 773 { 774 while (CPUMASK_TESTNZERO(target)) { 775 int n = BSFCPUMASK(target); 776 CPUMASK_NANDBIT(target, n); 777 single_apic_ipi(n, vector, delivery_mode); 778 } 779 } 780 781 /* 782 * Load a 'downcount time' in uSeconds. 783 */ 784 void 785 set_apic_timer(int us) 786 { 787 u_int count; 788 789 if (lapic_use_tscdeadline) { 790 uint64_t val; 791 792 val = lapic_scale_to_tsc(us, 1000000); 793 val += rdtsc(); 794 /* No need to arm the lapic here, just track the timeout. */ 795 tsc_deadlines[mycpuid].downcount_time = val; 796 return; 797 } 798 799 /* 800 * When we reach here, lapic timer's frequency 801 * must have been calculated as well as the 802 * divisor (lapic->dcr_timer is setup during the 803 * divisor calculation). 804 */ 805 KKASSERT(lapic_cputimer_intr.freq != 0 && 806 lapic_timer_divisor_idx >= 0); 807 808 count = ((us * (int64_t)lapic_cputimer_intr.freq) + 999999) / 1000000; 809 lapic_timer_oneshot(count); 810 } 811 812 813 /* 814 * Read remaining time in timer, in microseconds (rounded up). 815 */ 816 int 817 read_apic_timer(void) 818 { 819 uint64_t val; 820 821 if (lapic_use_tscdeadline) { 822 uint64_t now; 823 824 val = tsc_deadlines[mycpuid].downcount_time; 825 now = rdtsc(); 826 if (val == 0 || now > val) { 827 return 0; 828 } else { 829 val -= now; 830 val *= 1000000; 831 val += (tsc_frequency - 1); 832 val /= tsc_frequency; 833 if (val > INT_MAX) 834 val = INT_MAX; 835 return val; 836 } 837 } 838 839 val = lapic->ccr_timer; 840 if (val == 0) 841 return 0; 842 843 KKASSERT(lapic_cputimer_intr.freq > 0); 844 val *= 1000000; 845 val += (lapic_cputimer_intr.freq - 1); 846 val /= lapic_cputimer_intr.freq; 847 if (val > INT_MAX) 848 val = INT_MAX; 849 return val; 850 } 851 852 853 /* 854 * Spin-style delay, set delay time in uS, spin till it drains. 855 */ 856 void 857 u_sleep(int count) 858 { 859 set_apic_timer(count); 860 while (read_apic_timer()) 861 /* spin */ ; 862 } 863 864 int 865 lapic_unused_apic_id(int start) 866 { 867 int i; 868 869 for (i = start; i < APICID_MAX; ++i) { 870 if (APICID_TO_CPUID(i) == -1) 871 return i; 872 } 873 return NAPICID; 874 } 875 876 void 877 lapic_map(vm_paddr_t lapic_addr) 878 { 879 lapic = pmap_mapdev_uncacheable(lapic_addr, sizeof(struct LAPIC)); 880 } 881 882 static TAILQ_HEAD(, lapic_enumerator) lapic_enumerators = 883 TAILQ_HEAD_INITIALIZER(lapic_enumerators); 884 885 int 886 lapic_config(void) 887 { 888 struct lapic_enumerator *e; 889 int error, i, ap_max; 890 891 KKASSERT(lapic_enable); 892 893 for (i = 0; i < NAPICID; ++i) 894 APICID_TO_CPUID(i) = -1; 895 896 TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) { 897 error = e->lapic_probe(e); 898 if (!error) 899 break; 900 } 901 if (e == NULL) { 902 kprintf("LAPIC: Can't find LAPIC\n"); 903 return ENXIO; 904 } 905 906 error = e->lapic_enumerate(e); 907 if (error) { 908 kprintf("LAPIC: enumeration failed\n"); 909 return ENXIO; 910 } 911 912 ap_max = MAXCPU - 1; 913 TUNABLE_INT_FETCH("hw.ap_max", &ap_max); 914 if (ap_max > MAXCPU - 1) 915 ap_max = MAXCPU - 1; 916 917 if (naps > ap_max) { 918 kprintf("LAPIC: Warning use only %d out of %d " 919 "available APs\n", 920 ap_max, naps); 921 naps = ap_max; 922 } 923 924 return 0; 925 } 926 927 void 928 lapic_enumerator_register(struct lapic_enumerator *ne) 929 { 930 struct lapic_enumerator *e; 931 932 TAILQ_FOREACH(e, &lapic_enumerators, lapic_link) { 933 if (e->lapic_prio < ne->lapic_prio) { 934 TAILQ_INSERT_BEFORE(e, ne, lapic_link); 935 return; 936 } 937 } 938 TAILQ_INSERT_TAIL(&lapic_enumerators, ne, lapic_link); 939 } 940 941 void 942 lapic_set_cpuid(int cpu_id, int apic_id) 943 { 944 CPUID_TO_APICID(cpu_id) = apic_id; 945 APICID_TO_CPUID(apic_id) = cpu_id; 946 } 947 948 void 949 lapic_fixup_noioapic(void) 950 { 951 u_int temp; 952 953 /* Only allowed on BSP */ 954 KKASSERT(mycpuid == 0); 955 KKASSERT(!ioapic_enable); 956 957 temp = lapic->lvt_lint0; 958 temp &= ~APIC_LVT_MASKED; 959 lapic->lvt_lint0 = temp; 960 961 temp = lapic->lvt_lint1; 962 temp |= APIC_LVT_MASKED; 963 lapic->lvt_lint1 = temp; 964 } 965 966 static void 967 lapic_sysinit(void *dummy __unused) 968 { 969 if (lapic_enable) { 970 int error; 971 972 error = lapic_config(); 973 if (error) 974 lapic_enable = 0; 975 } 976 977 if (lapic_enable) { 978 /* Initialize BSP's local APIC */ 979 lapic_init(TRUE); 980 } else if (ioapic_enable) { 981 ioapic_enable = 0; 982 icu_reinit_noioapic(); 983 } 984 } 985 SYSINIT(lapic, SI_BOOT2_LAPIC, SI_ORDER_FIRST, lapic_sysinit, NULL); 986