1 /* 2 * Copyright (c) 1996, by Steve Passe 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. The name of the developer may NOT be used to endorse or promote products 11 * derived from this software without specific prior written permission. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.115.2.15 2003/03/14 21:22:35 jhb Exp $ 26 * $DragonFly: src/sys/platform/pc32/i386/mp_machdep.c,v 1.60 2008/06/07 12:03:52 mneumann Exp $ 27 */ 28 29 #include "opt_cpu.h" 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/sysctl.h> 35 #include <sys/malloc.h> 36 #include <sys/memrange.h> 37 #include <sys/cons.h> /* cngetc() */ 38 #include <sys/machintr.h> 39 40 #include <sys/mplock2.h> 41 42 #include <vm/vm.h> 43 #include <vm/vm_param.h> 44 #include <vm/pmap.h> 45 #include <vm/vm_kern.h> 46 #include <vm/vm_extern.h> 47 #include <sys/lock.h> 48 #include <vm/vm_map.h> 49 #include <sys/user.h> 50 #ifdef GPROF 51 #include <sys/gmon.h> 52 #endif 53 54 #include <machine/smp.h> 55 #include <machine_base/apic/apicreg.h> 56 #include <machine/atomic.h> 57 #include <machine/cpufunc.h> 58 #include <machine_base/apic/mpapic.h> 59 #include <machine/psl.h> 60 #include <machine/segments.h> 61 #include <machine/tss.h> 62 #include <machine/specialreg.h> 63 #include <machine/globaldata.h> 64 #include <machine/pmap_inval.h> 65 66 #include <machine/md_var.h> /* setidt() */ 67 #include <machine_base/icu/icu.h> /* IPIs */ 68 #include <machine_base/isa/intr_machdep.h> /* IPIs */ 69 70 #define FIXUP_EXTRA_APIC_INTS 8 /* additional entries we may create */ 71 72 #define WARMBOOT_TARGET 0 73 #define WARMBOOT_OFF (KERNBASE + 0x0467) 74 #define WARMBOOT_SEG (KERNBASE + 0x0469) 75 76 #define BIOS_BASE (0xf0000) 77 #define BIOS_BASE2 (0xe0000) 78 #define BIOS_SIZE (0x10000) 79 #define BIOS_COUNT (BIOS_SIZE/4) 80 81 #define CMOS_REG (0x70) 82 #define CMOS_DATA (0x71) 83 #define BIOS_RESET (0x0f) 84 #define BIOS_WARM (0x0a) 85 86 #define PROCENTRY_FLAG_EN 0x01 87 #define PROCENTRY_FLAG_BP 0x02 88 #define IOAPICENTRY_FLAG_EN 0x01 89 90 91 /* MP Floating Pointer Structure */ 92 typedef struct MPFPS { 93 char signature[4]; 94 u_int32_t pap; 95 u_char length; 96 u_char spec_rev; 97 u_char checksum; 98 u_char mpfb1; 99 u_char mpfb2; 100 u_char mpfb3; 101 u_char mpfb4; 102 u_char mpfb5; 103 } *mpfps_t; 104 105 /* MP Configuration Table Header */ 106 typedef struct MPCTH { 107 char signature[4]; 108 u_short base_table_length; 109 u_char spec_rev; 110 u_char checksum; 111 u_char oem_id[8]; 112 u_char product_id[12]; 113 u_int32_t oem_table_pointer; 114 u_short oem_table_size; 115 u_short entry_count; 116 u_int32_t apic_address; 117 u_short extended_table_length; 118 u_char extended_table_checksum; 119 u_char reserved; 120 } *mpcth_t; 121 122 123 typedef struct PROCENTRY { 124 u_char type; 125 u_char apic_id; 126 u_char apic_version; 127 u_char cpu_flags; 128 u_int32_t cpu_signature; 129 u_int32_t feature_flags; 130 u_int32_t reserved1; 131 u_int32_t reserved2; 132 } *proc_entry_ptr; 133 134 typedef struct BUSENTRY { 135 u_char type; 136 u_char bus_id; 137 char bus_type[6]; 138 } *bus_entry_ptr; 139 140 typedef struct IOAPICENTRY { 141 u_char type; 142 u_char apic_id; 143 u_char apic_version; 144 u_char apic_flags; 145 u_int32_t apic_address; 146 } *io_apic_entry_ptr; 147 148 typedef struct INTENTRY { 149 u_char type; 150 u_char int_type; 151 u_short int_flags; 152 u_char src_bus_id; 153 u_char src_bus_irq; 154 u_char dst_apic_id; 155 u_char dst_apic_int; 156 } *int_entry_ptr; 157 158 /* descriptions of MP basetable entries */ 159 typedef struct BASETABLE_ENTRY { 160 u_char type; 161 u_char length; 162 char name[16]; 163 } basetable_entry; 164 165 struct mptable_pos { 166 mpfps_t mp_fps; 167 mpcth_t mp_cth; 168 vm_size_t mp_cth_mapsz; 169 }; 170 171 typedef int (*mptable_iter_func)(void *, const void *, int); 172 173 /* 174 * this code MUST be enabled here and in mpboot.s. 175 * it follows the very early stages of AP boot by placing values in CMOS ram. 176 * it NORMALLY will never be needed and thus the primitive method for enabling. 177 * 178 */ 179 #if defined(CHECK_POINTS) 180 #define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) 181 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) 182 183 #define CHECK_INIT(D); \ 184 CHECK_WRITE(0x34, (D)); \ 185 CHECK_WRITE(0x35, (D)); \ 186 CHECK_WRITE(0x36, (D)); \ 187 CHECK_WRITE(0x37, (D)); \ 188 CHECK_WRITE(0x38, (D)); \ 189 CHECK_WRITE(0x39, (D)); 190 191 #define CHECK_PRINT(S); \ 192 kprintf("%s: %d, %d, %d, %d, %d, %d\n", \ 193 (S), \ 194 CHECK_READ(0x34), \ 195 CHECK_READ(0x35), \ 196 CHECK_READ(0x36), \ 197 CHECK_READ(0x37), \ 198 CHECK_READ(0x38), \ 199 CHECK_READ(0x39)); 200 201 #else /* CHECK_POINTS */ 202 203 #define CHECK_INIT(D) 204 #define CHECK_PRINT(S) 205 206 #endif /* CHECK_POINTS */ 207 208 /* 209 * Values to send to the POST hardware. 210 */ 211 #define MP_BOOTADDRESS_POST 0x10 212 #define MP_PROBE_POST 0x11 213 #define MPTABLE_PASS1_POST 0x12 214 215 #define MP_START_POST 0x13 216 #define MP_ENABLE_POST 0x14 217 #define MPTABLE_PASS2_POST 0x15 218 219 #define START_ALL_APS_POST 0x16 220 #define INSTALL_AP_TRAMP_POST 0x17 221 #define START_AP_POST 0x18 222 223 #define MP_ANNOUNCE_POST 0x19 224 225 /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ 226 int current_postcode; 227 228 /** XXX FIXME: what system files declare these??? */ 229 extern struct region_descriptor r_gdt, r_idt; 230 231 int mp_naps; /* # of Applications processors */ 232 #ifdef SMP /* APIC-IO */ 233 static int mp_nbusses; /* # of busses */ 234 int mp_napics; /* # of IO APICs */ 235 vm_offset_t io_apic_address[NAPICID]; /* NAPICID is more than enough */ 236 u_int32_t *io_apic_versions; 237 #endif 238 extern int nkpt; 239 240 u_int32_t cpu_apic_versions[NAPICID]; /* populated during mptable scan */ 241 int64_t tsc0_offset; 242 extern int64_t tsc_offsets[]; 243 244 extern u_long ebda_addr; 245 246 #ifdef SMP /* APIC-IO */ 247 struct apic_intmapinfo int_to_apicintpin[APIC_INTMAPSIZE]; 248 #endif 249 250 /* 251 * APIC ID logical/physical mapping structures. 252 * We oversize these to simplify boot-time config. 253 */ 254 int cpu_num_to_apic_id[NAPICID]; 255 #ifdef SMP /* APIC-IO */ 256 int io_num_to_apic_id[NAPICID]; 257 #endif 258 int apic_id_to_logical[NAPICID]; 259 260 /* AP uses this during bootstrap. Do not staticize. */ 261 char *bootSTK; 262 static int bootAP; 263 264 struct pcb stoppcbs[MAXCPU]; 265 266 extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32); 267 268 static basetable_entry basetable_entry_types[] = 269 { 270 {0, 20, "Processor"}, 271 {1, 8, "Bus"}, 272 {2, 8, "I/O APIC"}, 273 {3, 8, "I/O INT"}, 274 {4, 8, "Local INT"} 275 }; 276 277 /* 278 * Local data and functions. 279 */ 280 281 static u_int boot_address; 282 static u_int base_memory; 283 static int mp_finish; 284 285 static void mp_enable(u_int boot_addr); 286 287 static int mptable_iterate_entries(const mpcth_t, 288 mptable_iter_func, void *); 289 static int mptable_probe(void); 290 static int mptable_search(void); 291 static int mptable_check(vm_paddr_t); 292 static long mptable_search_sig(u_int32_t target, int count); 293 static int mptable_hyperthread_fixup(cpumask_t, int); 294 #ifdef SMP /* APIC-IO */ 295 static void mptable_pass1(struct mptable_pos *); 296 static void mptable_pass2(struct mptable_pos *); 297 static void mptable_default(int type); 298 static void mptable_fix(void); 299 #endif 300 static int mptable_map(struct mptable_pos *, vm_paddr_t); 301 static void mptable_unmap(struct mptable_pos *); 302 static void mptable_imcr(struct mptable_pos *); 303 304 static int mptable_lapic_probe(struct lapic_enumerator *); 305 static void mptable_lapic_enumerate(struct lapic_enumerator *); 306 static void mptable_lapic_default(void); 307 308 #ifdef SMP /* APIC-IO */ 309 static void setup_apic_irq_mapping(void); 310 static int apic_int_is_bus_type(int intr, int bus_type); 311 #endif 312 static int start_all_aps(u_int boot_addr); 313 #if 0 314 static void install_ap_tramp(u_int boot_addr); 315 #endif 316 static int start_ap(struct mdglobaldata *gd, u_int boot_addr, int smibest); 317 static int smitest(void); 318 319 static cpumask_t smp_startup_mask = 1; /* which cpus have been started */ 320 cpumask_t smp_active_mask = 1; /* which cpus are ready for IPIs etc? */ 321 SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RD, &smp_active_mask, 0, ""); 322 static u_int bootMP_size; 323 324 /* 325 * Calculate usable address in base memory for AP trampoline code. 326 */ 327 u_int 328 mp_bootaddress(u_int basemem) 329 { 330 POSTCODE(MP_BOOTADDRESS_POST); 331 332 base_memory = basemem; 333 334 bootMP_size = mptramp_end - mptramp_start; 335 boot_address = trunc_page(basemem * 1024); /* round down to 4k boundary */ 336 if (((basemem * 1024) - boot_address) < bootMP_size) 337 boot_address -= PAGE_SIZE; /* not enough, lower by 4k */ 338 /* 3 levels of page table pages */ 339 mptramp_pagetables = boot_address - (PAGE_SIZE * 3); 340 341 return mptramp_pagetables; 342 } 343 344 345 static int 346 mptable_probe(void) 347 { 348 int mpfps_paddr; 349 350 mpfps_paddr = mptable_search(); 351 if (mptable_check(mpfps_paddr)) 352 return 0; 353 354 return mpfps_paddr; 355 } 356 357 /* 358 * Look for an Intel MP spec table (ie, SMP capable hardware). 359 */ 360 static int 361 mptable_search(void) 362 { 363 long x; 364 u_int32_t target; 365 366 POSTCODE(MP_PROBE_POST); 367 368 /* see if EBDA exists */ 369 if (ebda_addr != 0) { 370 /* search first 1K of EBDA */ 371 target = (u_int32_t)ebda_addr; 372 if ((x = mptable_search_sig(target, 1024 / 4)) > 0) 373 return x; 374 } else { 375 /* last 1K of base memory, effective 'top of base' passed in */ 376 target = (u_int32_t)(base_memory - 0x400); 377 if ((x = mptable_search_sig(target, 1024 / 4)) > 0) 378 return x; 379 } 380 381 /* search the BIOS */ 382 target = (u_int32_t)BIOS_BASE; 383 if ((x = mptable_search_sig(target, BIOS_COUNT)) > 0) 384 return x; 385 386 /* search the extended BIOS */ 387 target = (u_int32_t)BIOS_BASE2; 388 if ((x = mptable_search_sig(target, BIOS_COUNT)) > 0) 389 return x; 390 391 /* nothing found */ 392 return 0; 393 } 394 395 struct mptable_check_cbarg { 396 int cpu_count; 397 int found_bsp; 398 }; 399 400 static int 401 mptable_check_callback(void *xarg, const void *pos, int type) 402 { 403 const struct PROCENTRY *ent; 404 struct mptable_check_cbarg *arg = xarg; 405 406 if (type != 0) 407 return 0; 408 ent = pos; 409 410 if ((ent->cpu_flags & PROCENTRY_FLAG_EN) == 0) 411 return 0; 412 arg->cpu_count++; 413 414 if (ent->cpu_flags & PROCENTRY_FLAG_BP) { 415 if (arg->found_bsp) { 416 kprintf("more than one BSP in base MP table\n"); 417 return EINVAL; 418 } 419 arg->found_bsp = 1; 420 } 421 return 0; 422 } 423 424 static int 425 mptable_check(vm_paddr_t mpfps_paddr) 426 { 427 struct mptable_pos mpt; 428 struct mptable_check_cbarg arg; 429 mpcth_t cth; 430 int error; 431 432 if (mpfps_paddr == 0) 433 return EOPNOTSUPP; 434 435 error = mptable_map(&mpt, mpfps_paddr); 436 if (error) 437 return error; 438 439 if (mpt.mp_fps->mpfb1 != 0) 440 goto done; 441 442 error = EINVAL; 443 444 cth = mpt.mp_cth; 445 if (cth == NULL) 446 goto done; 447 if (cth->apic_address == 0) 448 goto done; 449 450 bzero(&arg, sizeof(arg)); 451 error = mptable_iterate_entries(cth, mptable_check_callback, &arg); 452 if (!error) { 453 if (arg.cpu_count == 0) { 454 kprintf("MP table contains no processor entries\n"); 455 error = EINVAL; 456 } else if (!arg.found_bsp) { 457 kprintf("MP table does not contains BSP entry\n"); 458 error = EINVAL; 459 } 460 } 461 done: 462 mptable_unmap(&mpt); 463 return error; 464 } 465 466 static int 467 mptable_iterate_entries(const mpcth_t cth, mptable_iter_func func, void *arg) 468 { 469 int count, total_size; 470 const void *position; 471 472 KKASSERT(cth->base_table_length >= sizeof(struct MPCTH)); 473 total_size = cth->base_table_length - sizeof(struct MPCTH); 474 position = (const uint8_t *)cth + sizeof(struct MPCTH); 475 count = cth->entry_count; 476 477 while (count--) { 478 int type, error; 479 480 KKASSERT(total_size >= 0); 481 if (total_size == 0) { 482 kprintf("invalid base MP table, " 483 "entry count and length mismatch\n"); 484 return EINVAL; 485 } 486 487 type = *(const uint8_t *)position; 488 switch (type) { 489 case 0: /* processor_entry */ 490 case 1: /* bus_entry */ 491 case 2: /* io_apic_entry */ 492 case 3: /* int_entry */ 493 case 4: /* int_entry */ 494 break; 495 default: 496 kprintf("unknown base MP table entry type %d\n", type); 497 return EINVAL; 498 } 499 500 if (total_size < basetable_entry_types[type].length) { 501 kprintf("invalid base MP table length, " 502 "does not contain all entries\n"); 503 return EINVAL; 504 } 505 total_size -= basetable_entry_types[type].length; 506 507 error = func(arg, position, type); 508 if (error) 509 return error; 510 511 position = (const uint8_t *)position + 512 basetable_entry_types[type].length; 513 } 514 return 0; 515 } 516 517 518 /* 519 * Startup the SMP processors. 520 */ 521 void 522 mp_start(void) 523 { 524 POSTCODE(MP_START_POST); 525 mp_enable(boot_address); 526 } 527 528 529 /* 530 * Print various information about the SMP system hardware and setup. 531 */ 532 void 533 mp_announce(void) 534 { 535 int x; 536 537 POSTCODE(MP_ANNOUNCE_POST); 538 539 kprintf("DragonFly/MP: Multiprocessor motherboard\n"); 540 kprintf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0)); 541 kprintf(", version: 0x%08x\n", cpu_apic_versions[0]); 542 for (x = 1; x <= mp_naps; ++x) { 543 kprintf(" cpu%d (AP): apic id: %2d", x, CPU_TO_ID(x)); 544 kprintf(", version: 0x%08x\n", cpu_apic_versions[x]); 545 } 546 547 if (apic_io_enable) { 548 for (x = 0; x < mp_napics; ++x) { 549 kprintf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x)); 550 kprintf(", version: 0x%08x", io_apic_versions[x]); 551 kprintf(", at 0x%08lx\n", io_apic_address[x]); 552 } 553 } else { 554 kprintf(" Warning: APIC I/O disabled\n"); 555 } 556 } 557 558 /* 559 * AP cpu's call this to sync up protected mode. 560 * 561 * WARNING! %gs is not set up on entry. This routine sets up %gs. 562 */ 563 void 564 init_secondary(void) 565 { 566 int gsel_tss; 567 int x, myid = bootAP; 568 u_int64_t msr, cr0; 569 struct mdglobaldata *md; 570 struct privatespace *ps; 571 572 ps = &CPU_prvspace[myid]; 573 574 gdt_segs[GPROC0_SEL].ssd_base = 575 (long) &ps->mdglobaldata.gd_common_tss; 576 ps->mdglobaldata.mi.gd_prvspace = ps; 577 578 /* We fill the 32-bit segment descriptors */ 579 for (x = 0; x < NGDT; x++) { 580 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1)) 581 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x]); 582 } 583 /* And now a 64-bit one */ 584 ssdtosyssd(&gdt_segs[GPROC0_SEL], 585 (struct system_segment_descriptor *)&gdt[myid * NGDT + GPROC0_SEL]); 586 587 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 588 r_gdt.rd_base = (long) &gdt[myid * NGDT]; 589 lgdt(&r_gdt); /* does magic intra-segment return */ 590 591 /* lgdt() destroys the GSBASE value, so we load GSBASE after lgdt() */ 592 wrmsr(MSR_FSBASE, 0); /* User value */ 593 wrmsr(MSR_GSBASE, (u_int64_t)ps); 594 wrmsr(MSR_KGSBASE, 0); /* XXX User value while we're in the kernel */ 595 596 lidt(&r_idt); 597 598 #if 0 599 lldt(_default_ldt); 600 mdcpu->gd_currentldt = _default_ldt; 601 #endif 602 603 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 604 gdt[myid * NGDT + GPROC0_SEL].sd_type = SDT_SYSTSS; 605 606 md = mdcpu; /* loaded through %gs:0 (mdglobaldata.mi.gd_prvspace)*/ 607 608 md->gd_common_tss.tss_rsp0 = 0; /* not used until after switch */ 609 #if 0 /* JG XXX */ 610 md->gd_common_tss.tss_ioopt = (sizeof md->gd_common_tss) << 16; 611 #endif 612 md->gd_tss_gdt = &gdt[myid * NGDT + GPROC0_SEL]; 613 md->gd_common_tssd = *md->gd_tss_gdt; 614 615 /* double fault stack */ 616 md->gd_common_tss.tss_ist1 = 617 (long)&md->mi.gd_prvspace->idlestack[ 618 sizeof(md->mi.gd_prvspace->idlestack)]; 619 620 ltr(gsel_tss); 621 622 /* 623 * Set to a known state: 624 * Set by mpboot.s: CR0_PG, CR0_PE 625 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 626 */ 627 cr0 = rcr0(); 628 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); 629 load_cr0(cr0); 630 631 /* Set up the fast syscall stuff */ 632 msr = rdmsr(MSR_EFER) | EFER_SCE; 633 wrmsr(MSR_EFER, msr); 634 wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); 635 wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); 636 msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | 637 ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); 638 wrmsr(MSR_STAR, msr); 639 wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); 640 641 pmap_set_opt(); /* PSE/4MB pages, etc */ 642 #if JGXXX 643 /* Initialize the PAT MSR. */ 644 pmap_init_pat(); 645 #endif 646 647 /* set up CPU registers and state */ 648 cpu_setregs(); 649 650 /* set up SSE/NX registers */ 651 initializecpu(); 652 653 /* set up FPU state on the AP */ 654 npxinit(__INITIAL_NPXCW__); 655 656 /* disable the APIC, just to be SURE */ 657 lapic->svr &= ~APIC_SVR_ENABLE; 658 659 /* data returned to BSP */ 660 cpu_apic_versions[0] = lapic->version; 661 } 662 663 /******************************************************************* 664 * local functions and data 665 */ 666 667 /* 668 * start the SMP system 669 */ 670 static void 671 mp_enable(u_int boot_addr) 672 { 673 int apic; 674 u_int ux; 675 vm_paddr_t mpfps_paddr; 676 struct mptable_pos mpt; 677 678 POSTCODE(MP_ENABLE_POST); 679 680 lapic_config(); 681 682 mpfps_paddr = mptable_probe(); 683 if (mpfps_paddr) { 684 mptable_map(&mpt, mpfps_paddr); 685 mptable_imcr(&mpt); 686 mptable_unmap(&mpt); 687 } 688 if (apic_io_enable) { 689 690 if (!mpfps_paddr) 691 panic("no MP table, disable APIC_IO! (set hw.apic_io_enable=0)\n"); 692 693 mptable_map(&mpt, mpfps_paddr); 694 695 /* 696 * Examine the MP table for needed info 697 */ 698 mptable_pass1(&mpt); 699 mptable_pass2(&mpt); 700 701 mptable_unmap(&mpt); 702 703 /* Post scan cleanup */ 704 mptable_fix(); 705 706 setup_apic_irq_mapping(); 707 708 /* fill the LOGICAL io_apic_versions table */ 709 for (apic = 0; apic < mp_napics; ++apic) { 710 ux = io_apic_read(apic, IOAPIC_VER); 711 io_apic_versions[apic] = ux; 712 io_apic_set_id(apic, IO_TO_ID(apic)); 713 } 714 715 /* program each IO APIC in the system */ 716 for (apic = 0; apic < mp_napics; ++apic) 717 if (io_apic_setup(apic) < 0) 718 panic("IO APIC setup failure"); 719 720 } 721 722 /* 723 * These are required for SMP operation 724 */ 725 726 /* install a 'Spurious INTerrupt' vector */ 727 setidt(XSPURIOUSINT_OFFSET, Xspuriousint, 728 SDT_SYSIGT, SEL_KPL, 0); 729 730 /* install an inter-CPU IPI for TLB invalidation */ 731 setidt(XINVLTLB_OFFSET, Xinvltlb, 732 SDT_SYSIGT, SEL_KPL, 0); 733 734 /* install an inter-CPU IPI for IPIQ messaging */ 735 setidt(XIPIQ_OFFSET, Xipiq, 736 SDT_SYSIGT, SEL_KPL, 0); 737 738 /* install a timer vector */ 739 setidt(XTIMER_OFFSET, Xtimer, 740 SDT_SYSIGT, SEL_KPL, 0); 741 742 /* install an inter-CPU IPI for CPU stop/restart */ 743 setidt(XCPUSTOP_OFFSET, Xcpustop, 744 SDT_SYSIGT, SEL_KPL, 0); 745 746 /* start each Application Processor */ 747 start_all_aps(boot_addr); 748 } 749 750 751 /* 752 * look for the MP spec signature 753 */ 754 755 /* string defined by the Intel MP Spec as identifying the MP table */ 756 #define MP_SIG 0x5f504d5f /* _MP_ */ 757 #define NEXT(X) ((X) += 4) 758 static long 759 mptable_search_sig(u_int32_t target, int count) 760 { 761 vm_size_t map_size; 762 u_int32_t *addr; 763 int x, ret; 764 765 KKASSERT(target != 0); 766 767 map_size = count * sizeof(u_int32_t); 768 addr = pmap_mapdev((vm_paddr_t)target, map_size); 769 770 ret = 0; 771 for (x = 0; x < count; NEXT(x)) { 772 if (addr[x] == MP_SIG) { 773 /* make array index a byte index */ 774 ret = target + (x * sizeof(u_int32_t)); 775 break; 776 } 777 } 778 779 pmap_unmapdev((vm_offset_t)addr, map_size); 780 return ret; 781 } 782 783 784 typedef struct BUSDATA { 785 u_char bus_id; 786 enum busTypes bus_type; 787 } bus_datum; 788 789 typedef struct INTDATA { 790 u_char int_type; 791 u_short int_flags; 792 u_char src_bus_id; 793 u_char src_bus_irq; 794 u_char dst_apic_id; 795 u_char dst_apic_int; 796 u_char int_vector; 797 } io_int, local_int; 798 799 typedef struct BUSTYPENAME { 800 u_char type; 801 char name[7]; 802 } bus_type_name; 803 804 static bus_type_name bus_type_table[] = 805 { 806 {CBUS, "CBUS"}, 807 {CBUSII, "CBUSII"}, 808 {EISA, "EISA"}, 809 {MCA, "MCA"}, 810 {UNKNOWN_BUSTYPE, "---"}, 811 {ISA, "ISA"}, 812 {MCA, "MCA"}, 813 {UNKNOWN_BUSTYPE, "---"}, 814 {UNKNOWN_BUSTYPE, "---"}, 815 {UNKNOWN_BUSTYPE, "---"}, 816 {UNKNOWN_BUSTYPE, "---"}, 817 {UNKNOWN_BUSTYPE, "---"}, 818 {PCI, "PCI"}, 819 {UNKNOWN_BUSTYPE, "---"}, 820 {UNKNOWN_BUSTYPE, "---"}, 821 {UNKNOWN_BUSTYPE, "---"}, 822 {UNKNOWN_BUSTYPE, "---"}, 823 {XPRESS, "XPRESS"}, 824 {UNKNOWN_BUSTYPE, "---"} 825 }; 826 827 /* from MP spec v1.4, table 5-1 */ 828 static int default_data[7][5] = 829 { 830 /* nbus, id0, type0, id1, type1 */ 831 {1, 0, ISA, 255, 255}, 832 {1, 0, EISA, 255, 255}, 833 {1, 0, EISA, 255, 255}, 834 {1, 0, MCA, 255, 255}, 835 {2, 0, ISA, 1, PCI}, 836 {2, 0, EISA, 1, PCI}, 837 {2, 0, MCA, 1, PCI} 838 }; 839 840 /* the bus data */ 841 static bus_datum *bus_data; 842 843 /* the IO INT data, one entry per possible APIC INTerrupt */ 844 static io_int *io_apic_ints; 845 static int nintrs; 846 847 static int processor_entry (const struct PROCENTRY *entry, int cpu); 848 static int bus_entry (const struct BUSENTRY *entry, int bus); 849 static int io_apic_entry (const struct IOAPICENTRY *entry, int apic); 850 static int int_entry (const struct INTENTRY *entry, int intr); 851 static int lookup_bus_type (char *name); 852 853 static int 854 mptable_ioapic_pass1_callback(void *xarg, const void *pos, int type) 855 { 856 const struct IOAPICENTRY *ioapic_ent; 857 858 switch (type) { 859 case 1: /* bus_entry */ 860 ++mp_nbusses; 861 break; 862 863 case 2: /* io_apic_entry */ 864 ioapic_ent = pos; 865 if (ioapic_ent->apic_flags & IOAPICENTRY_FLAG_EN) { 866 io_apic_address[mp_napics++] = 867 (vm_offset_t)ioapic_ent->apic_address; 868 } 869 break; 870 871 case 3: /* int_entry */ 872 ++nintrs; 873 break; 874 } 875 return 0; 876 } 877 878 /* 879 * 1st pass on motherboard's Intel MP specification table. 880 * 881 * determines: 882 * io_apic_address[N] 883 * mp_nbusses 884 * mp_napics 885 * nintrs 886 */ 887 static void 888 mptable_pass1(struct mptable_pos *mpt) 889 { 890 mpfps_t fps; 891 int x; 892 893 POSTCODE(MPTABLE_PASS1_POST); 894 895 fps = mpt->mp_fps; 896 KKASSERT(fps != NULL); 897 898 /* clear various tables */ 899 for (x = 0; x < NAPICID; ++x) 900 io_apic_address[x] = ~0; /* IO APIC address table */ 901 902 mp_nbusses = 0; 903 mp_napics = 0; 904 nintrs = 0; 905 906 /* check for use of 'default' configuration */ 907 if (fps->mpfb1 != 0) { 908 io_apic_address[0] = DEFAULT_IO_APIC_BASE; 909 mp_nbusses = default_data[fps->mpfb1 - 1][0]; 910 mp_napics = 1; 911 nintrs = 16; 912 } else { 913 int error; 914 915 error = mptable_iterate_entries(mpt->mp_cth, 916 mptable_ioapic_pass1_callback, NULL); 917 if (error) 918 panic("mptable_iterate_entries(ioapic_pass1) failed\n"); 919 } 920 } 921 922 struct mptable_ioapic2_cbarg { 923 int bus; 924 int apic; 925 int intr; 926 }; 927 928 static int 929 mptable_ioapic_pass2_callback(void *xarg, const void *pos, int type) 930 { 931 struct mptable_ioapic2_cbarg *arg = xarg; 932 933 switch (type) { 934 case 1: 935 if (bus_entry(pos, arg->bus)) 936 ++arg->bus; 937 break; 938 939 case 2: 940 if (io_apic_entry(pos, arg->apic)) 941 ++arg->apic; 942 break; 943 944 case 3: 945 if (int_entry(pos, arg->intr)) 946 ++arg->intr; 947 break; 948 } 949 return 0; 950 } 951 952 /* 953 * 2nd pass on motherboard's Intel MP specification table. 954 * 955 * sets: 956 * ID_TO_IO(N), phy APIC ID to log CPU/IO table 957 * IO_TO_ID(N), logical IO to APIC ID table 958 * bus_data[N] 959 * io_apic_ints[N] 960 */ 961 static void 962 mptable_pass2(struct mptable_pos *mpt) 963 { 964 struct mptable_ioapic2_cbarg arg; 965 mpfps_t fps; 966 int error, x; 967 968 POSTCODE(MPTABLE_PASS2_POST); 969 970 fps = mpt->mp_fps; 971 KKASSERT(fps != NULL); 972 973 MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics, 974 M_DEVBUF, M_WAITOK); 975 MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics, 976 M_DEVBUF, M_WAITOK | M_ZERO); 977 MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + FIXUP_EXTRA_APIC_INTS), 978 M_DEVBUF, M_WAITOK); 979 MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses, 980 M_DEVBUF, M_WAITOK); 981 982 for (x = 0; x < mp_napics; x++) 983 ioapic[x] = permanent_io_mapping(io_apic_address[x]); 984 985 /* clear various tables */ 986 for (x = 0; x < NAPICID; ++x) { 987 ID_TO_IO(x) = -1; /* phy APIC ID to log CPU/IO table */ 988 IO_TO_ID(x) = -1; /* logical IO to APIC ID table */ 989 } 990 991 /* clear bus data table */ 992 for (x = 0; x < mp_nbusses; ++x) 993 bus_data[x].bus_id = 0xff; 994 995 /* clear IO APIC INT table */ 996 for (x = 0; x < nintrs + FIXUP_EXTRA_APIC_INTS; ++x) { 997 io_apic_ints[x].int_type = 0xff; 998 io_apic_ints[x].int_vector = 0xff; 999 } 1000 1001 /* check for use of 'default' configuration */ 1002 if (fps->mpfb1 != 0) { 1003 mptable_default(fps->mpfb1); 1004 return; 1005 } 1006 1007 bzero(&arg, sizeof(arg)); 1008 error = mptable_iterate_entries(mpt->mp_cth, 1009 mptable_ioapic_pass2_callback, &arg); 1010 if (error) 1011 panic("mptable_iterate_entries(ioapic_pass2) failed\n"); 1012 } 1013 1014 /* 1015 * Check if we should perform a hyperthreading "fix-up" to 1016 * enumerate any logical CPU's that aren't already listed 1017 * in the table. 1018 * 1019 * XXX: We assume that all of the physical CPUs in the 1020 * system have the same number of logical CPUs. 1021 * 1022 * XXX: We assume that APIC ID's are allocated such that 1023 * the APIC ID's for a physical processor are aligned 1024 * with the number of logical CPU's in the processor. 1025 */ 1026 static int 1027 mptable_hyperthread_fixup(cpumask_t id_mask, int cpu_count) 1028 { 1029 int i, id, lcpus_max, logical_cpus; 1030 1031 if ((cpu_feature & CPUID_HTT) == 0) 1032 return 0; 1033 1034 lcpus_max = (cpu_procinfo & CPUID_HTT_CORES) >> 16; 1035 if (lcpus_max <= 1) 1036 return 0; 1037 1038 if (strcmp(cpu_vendor, "GenuineIntel") == 0) { 1039 /* 1040 * INSTRUCTION SET REFERENCE, A-M (#253666) 1041 * Page 3-181, Table 3-20 1042 * "The nearest power-of-2 integer that is not smaller 1043 * than EBX[23:16] is the number of unique initial APIC 1044 * IDs reserved for addressing different logical 1045 * processors in a physical package." 1046 */ 1047 for (i = 0; ; ++i) { 1048 if ((1 << i) >= lcpus_max) { 1049 lcpus_max = 1 << i; 1050 break; 1051 } 1052 } 1053 } 1054 1055 KKASSERT(cpu_count != 0); 1056 if (cpu_count == lcpus_max) { 1057 /* We have nothing to fix */ 1058 return 0; 1059 } else if (cpu_count == 1) { 1060 /* XXX this may be incorrect */ 1061 logical_cpus = lcpus_max; 1062 } else { 1063 int cur, prev, dist; 1064 1065 /* 1066 * Calculate the distances between two nearest 1067 * APIC IDs. If all such distances are same, 1068 * then it is the number of missing cpus that 1069 * we are going to fill later. 1070 */ 1071 dist = cur = prev = -1; 1072 for (id = 0; id < MAXCPU; ++id) { 1073 if ((id_mask & CPUMASK(id)) == 0) 1074 continue; 1075 1076 cur = id; 1077 if (prev >= 0) { 1078 int new_dist = cur - prev; 1079 1080 if (dist < 0) 1081 dist = new_dist; 1082 1083 /* 1084 * Make sure that all distances 1085 * between two nearest APIC IDs 1086 * are same. 1087 */ 1088 if (dist != new_dist) 1089 return 0; 1090 } 1091 prev = cur; 1092 } 1093 if (dist == 1) 1094 return 0; 1095 1096 /* Must be power of 2 */ 1097 if (dist & (dist - 1)) 1098 return 0; 1099 1100 /* Can't exceed CPU package capacity */ 1101 if (dist > lcpus_max) 1102 logical_cpus = lcpus_max; 1103 else 1104 logical_cpus = dist; 1105 } 1106 1107 /* 1108 * For each APIC ID of a CPU that is set in the mask, 1109 * scan the other candidate APIC ID's for this 1110 * physical processor. If any of those ID's are 1111 * already in the table, then kill the fixup. 1112 */ 1113 for (id = 0; id < MAXCPU; id++) { 1114 if ((id_mask & CPUMASK(id)) == 0) 1115 continue; 1116 /* First, make sure we are on a logical_cpus boundary. */ 1117 if (id % logical_cpus != 0) 1118 return 0; 1119 for (i = id + 1; i < id + logical_cpus; i++) 1120 if ((id_mask & CPUMASK(i)) != 0) 1121 return 0; 1122 } 1123 return logical_cpus; 1124 } 1125 1126 static int 1127 mptable_map(struct mptable_pos *mpt, vm_paddr_t mpfps_paddr) 1128 { 1129 mpfps_t fps = NULL; 1130 mpcth_t cth = NULL; 1131 vm_size_t cth_mapsz = 0; 1132 1133 bzero(mpt, sizeof(*mpt)); 1134 1135 fps = pmap_mapdev(mpfps_paddr, sizeof(*fps)); 1136 if (fps->pap != 0) { 1137 /* 1138 * Map configuration table header to get 1139 * the base table size 1140 */ 1141 cth = pmap_mapdev(fps->pap, sizeof(*cth)); 1142 cth_mapsz = cth->base_table_length; 1143 pmap_unmapdev((vm_offset_t)cth, sizeof(*cth)); 1144 1145 if (cth_mapsz < sizeof(*cth)) { 1146 kprintf("invalid base MP table length %d\n", 1147 (int)cth_mapsz); 1148 pmap_unmapdev((vm_offset_t)fps, sizeof(*fps)); 1149 return EINVAL; 1150 } 1151 1152 /* 1153 * Map the base table 1154 */ 1155 cth = pmap_mapdev(fps->pap, cth_mapsz); 1156 } 1157 1158 mpt->mp_fps = fps; 1159 mpt->mp_cth = cth; 1160 mpt->mp_cth_mapsz = cth_mapsz; 1161 1162 return 0; 1163 } 1164 1165 static void 1166 mptable_unmap(struct mptable_pos *mpt) 1167 { 1168 if (mpt->mp_cth != NULL) { 1169 pmap_unmapdev((vm_offset_t)mpt->mp_cth, mpt->mp_cth_mapsz); 1170 mpt->mp_cth = NULL; 1171 mpt->mp_cth_mapsz = 0; 1172 } 1173 if (mpt->mp_fps != NULL) { 1174 pmap_unmapdev((vm_offset_t)mpt->mp_fps, sizeof(*mpt->mp_fps)); 1175 mpt->mp_fps = NULL; 1176 } 1177 } 1178 1179 void 1180 assign_apic_irq(int apic, int intpin, int irq) 1181 { 1182 int x; 1183 1184 if (int_to_apicintpin[irq].ioapic != -1) 1185 panic("assign_apic_irq: inconsistent table"); 1186 1187 int_to_apicintpin[irq].ioapic = apic; 1188 int_to_apicintpin[irq].int_pin = intpin; 1189 int_to_apicintpin[irq].apic_address = ioapic[apic]; 1190 int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin; 1191 1192 for (x = 0; x < nintrs; x++) { 1193 if ((io_apic_ints[x].int_type == 0 || 1194 io_apic_ints[x].int_type == 3) && 1195 io_apic_ints[x].int_vector == 0xff && 1196 io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) && 1197 io_apic_ints[x].dst_apic_int == intpin) 1198 io_apic_ints[x].int_vector = irq; 1199 } 1200 } 1201 1202 void 1203 revoke_apic_irq(int irq) 1204 { 1205 int x; 1206 int oldapic; 1207 int oldintpin; 1208 1209 if (int_to_apicintpin[irq].ioapic == -1) 1210 panic("revoke_apic_irq: inconsistent table"); 1211 1212 oldapic = int_to_apicintpin[irq].ioapic; 1213 oldintpin = int_to_apicintpin[irq].int_pin; 1214 1215 int_to_apicintpin[irq].ioapic = -1; 1216 int_to_apicintpin[irq].int_pin = 0; 1217 int_to_apicintpin[irq].apic_address = NULL; 1218 int_to_apicintpin[irq].redirindex = 0; 1219 1220 for (x = 0; x < nintrs; x++) { 1221 if ((io_apic_ints[x].int_type == 0 || 1222 io_apic_ints[x].int_type == 3) && 1223 io_apic_ints[x].int_vector != 0xff && 1224 io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) && 1225 io_apic_ints[x].dst_apic_int == oldintpin) 1226 io_apic_ints[x].int_vector = 0xff; 1227 } 1228 } 1229 1230 /* 1231 * Allocate an IRQ 1232 */ 1233 static void 1234 allocate_apic_irq(int intr) 1235 { 1236 int apic; 1237 int intpin; 1238 int irq; 1239 1240 if (io_apic_ints[intr].int_vector != 0xff) 1241 return; /* Interrupt handler already assigned */ 1242 1243 if (io_apic_ints[intr].int_type != 0 && 1244 (io_apic_ints[intr].int_type != 3 || 1245 (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) && 1246 io_apic_ints[intr].dst_apic_int == 0))) 1247 return; /* Not INT or ExtInt on != (0, 0) */ 1248 1249 irq = 0; 1250 while (irq < APIC_INTMAPSIZE && 1251 int_to_apicintpin[irq].ioapic != -1) 1252 irq++; 1253 1254 if (irq >= APIC_INTMAPSIZE) 1255 return; /* No free interrupt handlers */ 1256 1257 apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id); 1258 intpin = io_apic_ints[intr].dst_apic_int; 1259 1260 assign_apic_irq(apic, intpin, irq); 1261 } 1262 1263 1264 static void 1265 swap_apic_id(int apic, int oldid, int newid) 1266 { 1267 int x; 1268 int oapic; 1269 1270 1271 if (oldid == newid) 1272 return; /* Nothing to do */ 1273 1274 kprintf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n", 1275 apic, oldid, newid); 1276 1277 /* Swap physical APIC IDs in interrupt entries */ 1278 for (x = 0; x < nintrs; x++) { 1279 if (io_apic_ints[x].dst_apic_id == oldid) 1280 io_apic_ints[x].dst_apic_id = newid; 1281 else if (io_apic_ints[x].dst_apic_id == newid) 1282 io_apic_ints[x].dst_apic_id = oldid; 1283 } 1284 1285 /* Swap physical APIC IDs in IO_TO_ID mappings */ 1286 for (oapic = 0; oapic < mp_napics; oapic++) 1287 if (IO_TO_ID(oapic) == newid) 1288 break; 1289 1290 if (oapic < mp_napics) { 1291 kprintf("Changing APIC ID for IO APIC #%d from " 1292 "%d to %d in MP table\n", 1293 oapic, newid, oldid); 1294 IO_TO_ID(oapic) = oldid; 1295 } 1296 IO_TO_ID(apic) = newid; 1297 } 1298 1299 1300 static void 1301 fix_id_to_io_mapping(void) 1302 { 1303 int x; 1304 1305 for (x = 0; x < NAPICID; x++) 1306 ID_TO_IO(x) = -1; 1307 1308 for (x = 0; x <= mp_naps; x++) { 1309 if ((u_int)CPU_TO_ID(x) < NAPICID) 1310 ID_TO_IO(CPU_TO_ID(x)) = x; 1311 } 1312 1313 for (x = 0; x < mp_napics; x++) { 1314 if ((u_int)IO_TO_ID(x) < NAPICID) 1315 ID_TO_IO(IO_TO_ID(x)) = x; 1316 } 1317 } 1318 1319 1320 static int 1321 first_free_apic_id(void) 1322 { 1323 int freeid, x; 1324 1325 for (freeid = 0; freeid < NAPICID; freeid++) { 1326 for (x = 0; x <= mp_naps; x++) 1327 if (CPU_TO_ID(x) == freeid) 1328 break; 1329 if (x <= mp_naps) 1330 continue; 1331 for (x = 0; x < mp_napics; x++) 1332 if (IO_TO_ID(x) == freeid) 1333 break; 1334 if (x < mp_napics) 1335 continue; 1336 return freeid; 1337 } 1338 return freeid; 1339 } 1340 1341 1342 static int 1343 io_apic_id_acceptable(int apic, int id) 1344 { 1345 int cpu; /* Logical CPU number */ 1346 int oapic; /* Logical IO APIC number for other IO APIC */ 1347 1348 if ((u_int)id >= NAPICID) 1349 return 0; /* Out of range */ 1350 1351 for (cpu = 0; cpu <= mp_naps; cpu++) { 1352 if (CPU_TO_ID(cpu) == id) 1353 return 0; /* Conflict with CPU */ 1354 } 1355 1356 for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++) { 1357 if (IO_TO_ID(oapic) == id) 1358 return 0; /* Conflict with other APIC */ 1359 } 1360 1361 return 1; /* ID is acceptable for IO APIC */ 1362 } 1363 1364 static 1365 io_int * 1366 io_apic_find_int_entry(int apic, int pin) 1367 { 1368 int x; 1369 1370 /* search each of the possible INTerrupt sources */ 1371 for (x = 0; x < nintrs; ++x) { 1372 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1373 (pin == io_apic_ints[x].dst_apic_int)) 1374 return (&io_apic_ints[x]); 1375 } 1376 return NULL; 1377 } 1378 1379 /* 1380 * parse an Intel MP specification table 1381 */ 1382 static void 1383 mptable_fix(void) 1384 { 1385 int x; 1386 int id; 1387 int apic; /* IO APIC unit number */ 1388 int freeid; /* Free physical APIC ID */ 1389 int physid; /* Current physical IO APIC ID */ 1390 io_int *io14; 1391 int bus_0 = 0; /* Stop GCC warning */ 1392 int bus_pci = 0; /* Stop GCC warning */ 1393 int num_pci_bus; 1394 1395 /* 1396 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS 1397 * did it wrong. The MP spec says that when more than 1 PCI bus 1398 * exists the BIOS must begin with bus entries for the PCI bus and use 1399 * actual PCI bus numbering. This implies that when only 1 PCI bus 1400 * exists the BIOS can choose to ignore this ordering, and indeed many 1401 * MP motherboards do ignore it. This causes a problem when the PCI 1402 * sub-system makes requests of the MP sub-system based on PCI bus 1403 * numbers. So here we look for the situation and renumber the 1404 * busses and associated INTs in an effort to "make it right". 1405 */ 1406 1407 /* find bus 0, PCI bus, count the number of PCI busses */ 1408 for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) { 1409 if (bus_data[x].bus_id == 0) { 1410 bus_0 = x; 1411 } 1412 if (bus_data[x].bus_type == PCI) { 1413 ++num_pci_bus; 1414 bus_pci = x; 1415 } 1416 } 1417 /* 1418 * bus_0 == slot of bus with ID of 0 1419 * bus_pci == slot of last PCI bus encountered 1420 */ 1421 1422 /* check the 1 PCI bus case for sanity */ 1423 /* if it is number 0 all is well */ 1424 if (num_pci_bus == 1 && 1425 bus_data[bus_pci].bus_id != 0) { 1426 1427 /* mis-numbered, swap with whichever bus uses slot 0 */ 1428 1429 /* swap the bus entry types */ 1430 bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type; 1431 bus_data[bus_0].bus_type = PCI; 1432 1433 /* swap each relevant INTerrupt entry */ 1434 id = bus_data[bus_pci].bus_id; 1435 for (x = 0; x < nintrs; ++x) { 1436 if (io_apic_ints[x].src_bus_id == id) { 1437 io_apic_ints[x].src_bus_id = 0; 1438 } 1439 else if (io_apic_ints[x].src_bus_id == 0) { 1440 io_apic_ints[x].src_bus_id = id; 1441 } 1442 } 1443 } 1444 1445 /* Assign IO APIC IDs. 1446 * 1447 * First try the existing ID. If a conflict is detected, try 1448 * the ID in the MP table. If a conflict is still detected, find 1449 * a free id. 1450 * 1451 * We cannot use the ID_TO_IO table before all conflicts has been 1452 * resolved and the table has been corrected. 1453 */ 1454 for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */ 1455 1456 /* First try to use the value set by the BIOS */ 1457 physid = io_apic_get_id(apic); 1458 if (io_apic_id_acceptable(apic, physid)) { 1459 if (IO_TO_ID(apic) != physid) 1460 swap_apic_id(apic, IO_TO_ID(apic), physid); 1461 continue; 1462 } 1463 1464 /* Then check if the value in the MP table is acceptable */ 1465 if (io_apic_id_acceptable(apic, IO_TO_ID(apic))) 1466 continue; 1467 1468 /* Last resort, find a free APIC ID and use it */ 1469 freeid = first_free_apic_id(); 1470 if (freeid >= NAPICID) 1471 panic("No free physical APIC IDs found"); 1472 1473 if (io_apic_id_acceptable(apic, freeid)) { 1474 swap_apic_id(apic, IO_TO_ID(apic), freeid); 1475 continue; 1476 } 1477 panic("Free physical APIC ID not usable"); 1478 } 1479 fix_id_to_io_mapping(); 1480 1481 /* detect and fix broken Compaq MP table */ 1482 if (apic_int_type(0, 0) == -1) { 1483 kprintf("APIC_IO: MP table broken: 8259->APIC entry missing!\n"); 1484 io_apic_ints[nintrs].int_type = 3; /* ExtInt */ 1485 io_apic_ints[nintrs].int_vector = 0xff; /* Unassigned */ 1486 /* XXX fixme, set src bus id etc, but it doesn't seem to hurt */ 1487 io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0); 1488 io_apic_ints[nintrs].dst_apic_int = 0; /* Pin 0 */ 1489 nintrs++; 1490 } else if (apic_int_type(0, 0) == 0) { 1491 kprintf("APIC_IO: MP table broken: ExtINT entry corrupt!\n"); 1492 for (x = 0; x < nintrs; ++x) 1493 if ((ID_TO_IO(io_apic_ints[x].dst_apic_id) == 0) && 1494 (io_apic_ints[x].dst_apic_int) == 0) { 1495 io_apic_ints[x].int_type = 3; 1496 io_apic_ints[x].int_vector = 0xff; 1497 break; 1498 } 1499 } 1500 1501 /* 1502 * Fix missing IRQ 15 when IRQ 14 is an ISA interrupt. IDE 1503 * controllers universally come in pairs. If IRQ 14 is specified 1504 * as an ISA interrupt, then IRQ 15 had better be too. 1505 * 1506 * [ Shuttle XPC / AMD Athlon X2 ] 1507 * The MPTable is missing an entry for IRQ 15. Note that the 1508 * ACPI table has an entry for both 14 and 15. 1509 */ 1510 if (apic_int_type(0, 14) == 0 && apic_int_type(0, 15) == -1) { 1511 kprintf("APIC_IO: MP table broken: IRQ 15 not ISA when IRQ 14 is!\n"); 1512 io14 = io_apic_find_int_entry(0, 14); 1513 io_apic_ints[nintrs] = *io14; 1514 io_apic_ints[nintrs].src_bus_irq = 15; 1515 io_apic_ints[nintrs].dst_apic_int = 15; 1516 nintrs++; 1517 } 1518 } 1519 1520 /* Assign low level interrupt handlers */ 1521 static void 1522 setup_apic_irq_mapping(void) 1523 { 1524 int x; 1525 int int_vector; 1526 1527 /* Clear array */ 1528 for (x = 0; x < APIC_INTMAPSIZE; x++) { 1529 int_to_apicintpin[x].ioapic = -1; 1530 int_to_apicintpin[x].int_pin = 0; 1531 int_to_apicintpin[x].apic_address = NULL; 1532 int_to_apicintpin[x].redirindex = 0; 1533 1534 /* Default to masked */ 1535 int_to_apicintpin[x].flags = IOAPIC_IM_FLAG_MASKED; 1536 } 1537 1538 /* First assign ISA/EISA interrupts */ 1539 for (x = 0; x < nintrs; x++) { 1540 int_vector = io_apic_ints[x].src_bus_irq; 1541 if (int_vector < APIC_INTMAPSIZE && 1542 io_apic_ints[x].int_vector == 0xff && 1543 int_to_apicintpin[int_vector].ioapic == -1 && 1544 (apic_int_is_bus_type(x, ISA) || 1545 apic_int_is_bus_type(x, EISA)) && 1546 io_apic_ints[x].int_type == 0) { 1547 assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id), 1548 io_apic_ints[x].dst_apic_int, 1549 int_vector); 1550 } 1551 } 1552 1553 /* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */ 1554 for (x = 0; x < nintrs; x++) { 1555 if (io_apic_ints[x].dst_apic_int == 0 && 1556 io_apic_ints[x].dst_apic_id == IO_TO_ID(0) && 1557 io_apic_ints[x].int_vector == 0xff && 1558 int_to_apicintpin[0].ioapic == -1 && 1559 io_apic_ints[x].int_type == 3) { 1560 assign_apic_irq(0, 0, 0); 1561 break; 1562 } 1563 } 1564 1565 /* Assign PCI interrupts */ 1566 for (x = 0; x < nintrs; ++x) { 1567 if (io_apic_ints[x].int_type == 0 && 1568 io_apic_ints[x].int_vector == 0xff && 1569 apic_int_is_bus_type(x, PCI)) 1570 allocate_apic_irq(x); 1571 } 1572 } 1573 1574 void 1575 mp_set_cpuids(int cpu_id, int apic_id) 1576 { 1577 CPU_TO_ID(cpu_id) = apic_id; 1578 ID_TO_CPU(apic_id) = cpu_id; 1579 } 1580 1581 static int 1582 processor_entry(const struct PROCENTRY *entry, int cpu) 1583 { 1584 KKASSERT(cpu > 0); 1585 1586 /* check for usability */ 1587 if (!(entry->cpu_flags & PROCENTRY_FLAG_EN)) 1588 return 0; 1589 1590 /* check for BSP flag */ 1591 if (entry->cpu_flags & PROCENTRY_FLAG_BP) { 1592 mp_set_cpuids(0, entry->apic_id); 1593 return 0; /* its already been counted */ 1594 } 1595 1596 /* add another AP to list, if less than max number of CPUs */ 1597 else if (cpu < MAXCPU) { 1598 mp_set_cpuids(cpu, entry->apic_id); 1599 return 1; 1600 } 1601 1602 return 0; 1603 } 1604 1605 static int 1606 bus_entry(const struct BUSENTRY *entry, int bus) 1607 { 1608 int x; 1609 char c, name[8]; 1610 1611 /* encode the name into an index */ 1612 for (x = 0; x < 6; ++x) { 1613 if ((c = entry->bus_type[x]) == ' ') 1614 break; 1615 name[x] = c; 1616 } 1617 name[x] = '\0'; 1618 1619 if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE) 1620 panic("unknown bus type: '%s'", name); 1621 1622 bus_data[bus].bus_id = entry->bus_id; 1623 bus_data[bus].bus_type = x; 1624 1625 return 1; 1626 } 1627 1628 static int 1629 io_apic_entry(const struct IOAPICENTRY *entry, int apic) 1630 { 1631 if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN)) 1632 return 0; 1633 1634 IO_TO_ID(apic) = entry->apic_id; 1635 ID_TO_IO(entry->apic_id) = apic; 1636 1637 return 1; 1638 } 1639 1640 static int 1641 lookup_bus_type(char *name) 1642 { 1643 int x; 1644 1645 for (x = 0; x < MAX_BUSTYPE; ++x) 1646 if (strcmp(bus_type_table[x].name, name) == 0) 1647 return bus_type_table[x].type; 1648 1649 return UNKNOWN_BUSTYPE; 1650 } 1651 1652 static int 1653 int_entry(const struct INTENTRY *entry, int intr) 1654 { 1655 int apic; 1656 1657 io_apic_ints[intr].int_type = entry->int_type; 1658 io_apic_ints[intr].int_flags = entry->int_flags; 1659 io_apic_ints[intr].src_bus_id = entry->src_bus_id; 1660 io_apic_ints[intr].src_bus_irq = entry->src_bus_irq; 1661 if (entry->dst_apic_id == 255) { 1662 /* This signal goes to all IO APICS. Select an IO APIC 1663 with sufficient number of interrupt pins */ 1664 for (apic = 0; apic < mp_napics; apic++) 1665 if (((io_apic_read(apic, IOAPIC_VER) & 1666 IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >= 1667 entry->dst_apic_int) 1668 break; 1669 if (apic < mp_napics) 1670 io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic); 1671 else 1672 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id; 1673 } else 1674 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id; 1675 io_apic_ints[intr].dst_apic_int = entry->dst_apic_int; 1676 1677 return 1; 1678 } 1679 1680 static int 1681 apic_int_is_bus_type(int intr, int bus_type) 1682 { 1683 int bus; 1684 1685 for (bus = 0; bus < mp_nbusses; ++bus) 1686 if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id) 1687 && ((int) bus_data[bus].bus_type == bus_type)) 1688 return 1; 1689 1690 return 0; 1691 } 1692 1693 /* 1694 * Given a traditional ISA INT mask, return an APIC mask. 1695 */ 1696 u_int 1697 isa_apic_mask(u_int isa_mask) 1698 { 1699 int isa_irq; 1700 int apic_pin; 1701 1702 #if defined(SKIP_IRQ15_REDIRECT) 1703 if (isa_mask == (1 << 15)) { 1704 kprintf("skipping ISA IRQ15 redirect\n"); 1705 return isa_mask; 1706 } 1707 #endif /* SKIP_IRQ15_REDIRECT */ 1708 1709 isa_irq = ffs(isa_mask); /* find its bit position */ 1710 if (isa_irq == 0) /* doesn't exist */ 1711 return 0; 1712 --isa_irq; /* make it zero based */ 1713 1714 apic_pin = isa_apic_irq(isa_irq); /* look for APIC connection */ 1715 if (apic_pin == -1) 1716 return 0; 1717 1718 return (1 << apic_pin); /* convert pin# to a mask */ 1719 } 1720 1721 /* 1722 * Determine which APIC pin an ISA/EISA INT is attached to. 1723 */ 1724 #define INTTYPE(I) (io_apic_ints[(I)].int_type) 1725 #define INTPIN(I) (io_apic_ints[(I)].dst_apic_int) 1726 #define INTIRQ(I) (io_apic_ints[(I)].int_vector) 1727 #define INTAPIC(I) (ID_TO_IO(io_apic_ints[(I)].dst_apic_id)) 1728 1729 #define SRCBUSIRQ(I) (io_apic_ints[(I)].src_bus_irq) 1730 int 1731 isa_apic_irq(int isa_irq) 1732 { 1733 int intr; 1734 1735 for (intr = 0; intr < nintrs; ++intr) { /* check each record */ 1736 if (INTTYPE(intr) == 0) { /* standard INT */ 1737 if (SRCBUSIRQ(intr) == isa_irq) { 1738 if (apic_int_is_bus_type(intr, ISA) || 1739 apic_int_is_bus_type(intr, EISA)) { 1740 if (INTIRQ(intr) == 0xff) 1741 return -1; /* unassigned */ 1742 return INTIRQ(intr); /* found */ 1743 } 1744 } 1745 } 1746 } 1747 return -1; /* NOT found */ 1748 } 1749 1750 1751 /* 1752 * Determine which APIC pin a PCI INT is attached to. 1753 */ 1754 #define SRCBUSID(I) (io_apic_ints[(I)].src_bus_id) 1755 #define SRCBUSDEVICE(I) ((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f) 1756 #define SRCBUSLINE(I) (io_apic_ints[(I)].src_bus_irq & 0x03) 1757 int 1758 pci_apic_irq(int pciBus, int pciDevice, int pciInt) 1759 { 1760 int intr; 1761 1762 --pciInt; /* zero based */ 1763 1764 for (intr = 0; intr < nintrs; ++intr) { /* check each record */ 1765 if ((INTTYPE(intr) == 0) /* standard INT */ 1766 && (SRCBUSID(intr) == pciBus) 1767 && (SRCBUSDEVICE(intr) == pciDevice) 1768 && (SRCBUSLINE(intr) == pciInt)) { /* a candidate IRQ */ 1769 if (apic_int_is_bus_type(intr, PCI)) { 1770 if (INTIRQ(intr) == 0xff) { 1771 kprintf("IOAPIC: pci_apic_irq() " 1772 "failed\n"); 1773 return -1; /* unassigned */ 1774 } 1775 return INTIRQ(intr); /* exact match */ 1776 } 1777 } 1778 } 1779 1780 return -1; /* NOT found */ 1781 } 1782 1783 int 1784 next_apic_irq(int irq) 1785 { 1786 int intr, ointr; 1787 int bus, bustype; 1788 1789 bus = 0; 1790 bustype = 0; 1791 for (intr = 0; intr < nintrs; intr++) { 1792 if (INTIRQ(intr) != irq || INTTYPE(intr) != 0) 1793 continue; 1794 bus = SRCBUSID(intr); 1795 bustype = apic_bus_type(bus); 1796 if (bustype != ISA && 1797 bustype != EISA && 1798 bustype != PCI) 1799 continue; 1800 break; 1801 } 1802 if (intr >= nintrs) { 1803 return -1; 1804 } 1805 for (ointr = intr + 1; ointr < nintrs; ointr++) { 1806 if (INTTYPE(ointr) != 0) 1807 continue; 1808 if (bus != SRCBUSID(ointr)) 1809 continue; 1810 if (bustype == PCI) { 1811 if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr)) 1812 continue; 1813 if (SRCBUSLINE(intr) != SRCBUSLINE(ointr)) 1814 continue; 1815 } 1816 if (bustype == ISA || bustype == EISA) { 1817 if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr)) 1818 continue; 1819 } 1820 if (INTPIN(intr) == INTPIN(ointr)) 1821 continue; 1822 break; 1823 } 1824 if (ointr >= nintrs) { 1825 return -1; 1826 } 1827 return INTIRQ(ointr); 1828 } 1829 #undef SRCBUSLINE 1830 #undef SRCBUSDEVICE 1831 #undef SRCBUSID 1832 #undef SRCBUSIRQ 1833 1834 #undef INTPIN 1835 #undef INTIRQ 1836 #undef INTAPIC 1837 #undef INTTYPE 1838 1839 /* 1840 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt. 1841 * 1842 * XXX FIXME: 1843 * Exactly what this means is unclear at this point. It is a solution 1844 * for motherboards that redirect the MBIRQ0 pin. Generically a motherboard 1845 * could route any of the ISA INTs to upper (>15) IRQ values. But most would 1846 * NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an 1847 * option. 1848 */ 1849 int 1850 undirect_isa_irq(int rirq) 1851 { 1852 #if defined(READY) 1853 if (bootverbose) 1854 kprintf("Freeing redirected ISA irq %d.\n", rirq); 1855 /** FIXME: tickle the MB redirector chip */ 1856 return /* XXX */; 1857 #else 1858 if (bootverbose) 1859 kprintf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq); 1860 return 0; 1861 #endif /* READY */ 1862 } 1863 1864 1865 /* 1866 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt 1867 */ 1868 int 1869 undirect_pci_irq(int rirq) 1870 { 1871 #if defined(READY) 1872 if (bootverbose) 1873 kprintf("Freeing redirected PCI irq %d.\n", rirq); 1874 1875 /** FIXME: tickle the MB redirector chip */ 1876 return /* XXX */; 1877 #else 1878 if (bootverbose) 1879 kprintf("Freeing (NOT implemented) redirected PCI irq %d.\n", 1880 rirq); 1881 return 0; 1882 #endif /* READY */ 1883 } 1884 1885 1886 /* 1887 * given a bus ID, return: 1888 * the bus type if found 1889 * -1 if NOT found 1890 */ 1891 int 1892 apic_bus_type(int id) 1893 { 1894 int x; 1895 1896 for (x = 0; x < mp_nbusses; ++x) 1897 if (bus_data[x].bus_id == id) 1898 return bus_data[x].bus_type; 1899 1900 return -1; 1901 } 1902 1903 /* 1904 * given a LOGICAL APIC# and pin#, return: 1905 * the associated src bus ID if found 1906 * -1 if NOT found 1907 */ 1908 int 1909 apic_src_bus_id(int apic, int pin) 1910 { 1911 int x; 1912 1913 /* search each of the possible INTerrupt sources */ 1914 for (x = 0; x < nintrs; ++x) 1915 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1916 (pin == io_apic_ints[x].dst_apic_int)) 1917 return (io_apic_ints[x].src_bus_id); 1918 1919 return -1; /* NOT found */ 1920 } 1921 1922 /* 1923 * given a LOGICAL APIC# and pin#, return: 1924 * the associated src bus IRQ if found 1925 * -1 if NOT found 1926 */ 1927 int 1928 apic_src_bus_irq(int apic, int pin) 1929 { 1930 int x; 1931 1932 for (x = 0; x < nintrs; x++) 1933 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1934 (pin == io_apic_ints[x].dst_apic_int)) 1935 return (io_apic_ints[x].src_bus_irq); 1936 1937 return -1; /* NOT found */ 1938 } 1939 1940 1941 /* 1942 * given a LOGICAL APIC# and pin#, return: 1943 * the associated INTerrupt type if found 1944 * -1 if NOT found 1945 */ 1946 int 1947 apic_int_type(int apic, int pin) 1948 { 1949 int x; 1950 1951 /* search each of the possible INTerrupt sources */ 1952 for (x = 0; x < nintrs; ++x) { 1953 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1954 (pin == io_apic_ints[x].dst_apic_int)) 1955 return (io_apic_ints[x].int_type); 1956 } 1957 return -1; /* NOT found */ 1958 } 1959 1960 /* 1961 * Return the IRQ associated with an APIC pin 1962 */ 1963 int 1964 apic_irq(int apic, int pin) 1965 { 1966 int x; 1967 int res; 1968 1969 for (x = 0; x < nintrs; ++x) { 1970 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1971 (pin == io_apic_ints[x].dst_apic_int)) { 1972 res = io_apic_ints[x].int_vector; 1973 if (res == 0xff) 1974 return -1; 1975 if (apic != int_to_apicintpin[res].ioapic) 1976 panic("apic_irq: inconsistent table %d/%d", apic, int_to_apicintpin[res].ioapic); 1977 if (pin != int_to_apicintpin[res].int_pin) 1978 panic("apic_irq inconsistent table (2)"); 1979 return res; 1980 } 1981 } 1982 return -1; 1983 } 1984 1985 1986 /* 1987 * given a LOGICAL APIC# and pin#, return: 1988 * the associated trigger mode if found 1989 * -1 if NOT found 1990 */ 1991 int 1992 apic_trigger(int apic, int pin) 1993 { 1994 int x; 1995 1996 /* search each of the possible INTerrupt sources */ 1997 for (x = 0; x < nintrs; ++x) 1998 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1999 (pin == io_apic_ints[x].dst_apic_int)) 2000 return ((io_apic_ints[x].int_flags >> 2) & 0x03); 2001 2002 return -1; /* NOT found */ 2003 } 2004 2005 2006 /* 2007 * given a LOGICAL APIC# and pin#, return: 2008 * the associated 'active' level if found 2009 * -1 if NOT found 2010 */ 2011 int 2012 apic_polarity(int apic, int pin) 2013 { 2014 int x; 2015 2016 /* search each of the possible INTerrupt sources */ 2017 for (x = 0; x < nintrs; ++x) 2018 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 2019 (pin == io_apic_ints[x].dst_apic_int)) 2020 return (io_apic_ints[x].int_flags & 0x03); 2021 2022 return -1; /* NOT found */ 2023 } 2024 2025 /* 2026 * set data according to MP defaults 2027 * FIXME: probably not complete yet... 2028 */ 2029 static void 2030 mptable_default(int type) 2031 { 2032 int io_apic_id; 2033 int pin; 2034 2035 #if 0 2036 kprintf(" MP default config type: %d\n", type); 2037 switch (type) { 2038 case 1: 2039 kprintf(" bus: ISA, APIC: 82489DX\n"); 2040 break; 2041 case 2: 2042 kprintf(" bus: EISA, APIC: 82489DX\n"); 2043 break; 2044 case 3: 2045 kprintf(" bus: EISA, APIC: 82489DX\n"); 2046 break; 2047 case 4: 2048 kprintf(" bus: MCA, APIC: 82489DX\n"); 2049 break; 2050 case 5: 2051 kprintf(" bus: ISA+PCI, APIC: Integrated\n"); 2052 break; 2053 case 6: 2054 kprintf(" bus: EISA+PCI, APIC: Integrated\n"); 2055 break; 2056 case 7: 2057 kprintf(" bus: MCA+PCI, APIC: Integrated\n"); 2058 break; 2059 default: 2060 kprintf(" future type\n"); 2061 break; 2062 /* NOTREACHED */ 2063 } 2064 #endif /* 0 */ 2065 2066 /* one and only IO APIC */ 2067 io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24; 2068 2069 /* 2070 * sanity check, refer to MP spec section 3.6.6, last paragraph 2071 * necessary as some hardware isn't properly setting up the IO APIC 2072 */ 2073 #if defined(REALLY_ANAL_IOAPICID_VALUE) 2074 if (io_apic_id != 2) { 2075 #else 2076 if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) { 2077 #endif /* REALLY_ANAL_IOAPICID_VALUE */ 2078 io_apic_set_id(0, 2); 2079 io_apic_id = 2; 2080 } 2081 IO_TO_ID(0) = io_apic_id; 2082 ID_TO_IO(io_apic_id) = 0; 2083 2084 /* fill out bus entries */ 2085 switch (type) { 2086 case 1: 2087 case 2: 2088 case 3: 2089 case 4: 2090 case 5: 2091 case 6: 2092 case 7: 2093 bus_data[0].bus_id = default_data[type - 1][1]; 2094 bus_data[0].bus_type = default_data[type - 1][2]; 2095 bus_data[1].bus_id = default_data[type - 1][3]; 2096 bus_data[1].bus_type = default_data[type - 1][4]; 2097 break; 2098 2099 /* case 4: case 7: MCA NOT supported */ 2100 default: /* illegal/reserved */ 2101 panic("BAD default MP config: %d", type); 2102 /* NOTREACHED */ 2103 } 2104 2105 /* general cases from MP v1.4, table 5-2 */ 2106 for (pin = 0; pin < 16; ++pin) { 2107 io_apic_ints[pin].int_type = 0; 2108 io_apic_ints[pin].int_flags = 0x05; /* edge/active-hi */ 2109 io_apic_ints[pin].src_bus_id = 0; 2110 io_apic_ints[pin].src_bus_irq = pin; /* IRQ2 caught below */ 2111 io_apic_ints[pin].dst_apic_id = io_apic_id; 2112 io_apic_ints[pin].dst_apic_int = pin; /* 1-to-1 */ 2113 } 2114 2115 /* special cases from MP v1.4, table 5-2 */ 2116 if (type == 2) { 2117 io_apic_ints[2].int_type = 0xff; /* N/C */ 2118 io_apic_ints[13].int_type = 0xff; /* N/C */ 2119 #if !defined(APIC_MIXED_MODE) 2120 /** FIXME: ??? */ 2121 panic("sorry, can't support type 2 default yet"); 2122 #endif /* APIC_MIXED_MODE */ 2123 } 2124 else 2125 io_apic_ints[2].src_bus_irq = 0; /* ISA IRQ0 is on APIC INT 2 */ 2126 2127 if (type == 7) 2128 io_apic_ints[0].int_type = 0xff; /* N/C */ 2129 else 2130 io_apic_ints[0].int_type = 3; /* vectored 8259 */ 2131 } 2132 2133 /* 2134 * Map a physical memory address representing I/O into KVA. The I/O 2135 * block is assumed not to cross a page boundary. 2136 */ 2137 void * 2138 permanent_io_mapping(vm_paddr_t pa) 2139 { 2140 KKASSERT(pa < 0x100000000LL); 2141 2142 return pmap_mapdev_uncacheable(pa, PAGE_SIZE); 2143 } 2144 2145 /* 2146 * start each AP in our list 2147 */ 2148 static int 2149 start_all_aps(u_int boot_addr) 2150 { 2151 vm_offset_t va = boot_address + KERNBASE; 2152 u_int64_t *pt4, *pt3, *pt2; 2153 int x, i, pg; 2154 int shift; 2155 int smicount; 2156 int smibest; 2157 int smilast; 2158 u_char mpbiosreason; 2159 u_long mpbioswarmvec; 2160 struct mdglobaldata *gd; 2161 struct privatespace *ps; 2162 2163 POSTCODE(START_ALL_APS_POST); 2164 2165 /* Initialize BSP's local APIC */ 2166 apic_initialize(TRUE); 2167 2168 /* install the AP 1st level boot code */ 2169 pmap_kenter(va, boot_address); 2170 cpu_invlpg((void *)va); /* JG XXX */ 2171 bcopy(mptramp_start, (void *)va, bootMP_size); 2172 2173 /* Locate the page tables, they'll be below the trampoline */ 2174 pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE); 2175 pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t); 2176 pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t); 2177 2178 /* Create the initial 1GB replicated page tables */ 2179 for (i = 0; i < 512; i++) { 2180 /* Each slot of the level 4 pages points to the same level 3 page */ 2181 pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE); 2182 pt4[i] |= PG_V | PG_RW | PG_U; 2183 2184 /* Each slot of the level 3 pages points to the same level 2 page */ 2185 pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE)); 2186 pt3[i] |= PG_V | PG_RW | PG_U; 2187 2188 /* The level 2 page slots are mapped with 2MB pages for 1GB. */ 2189 pt2[i] = i * (2 * 1024 * 1024); 2190 pt2[i] |= PG_V | PG_RW | PG_PS | PG_U; 2191 } 2192 2193 /* save the current value of the warm-start vector */ 2194 mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); 2195 outb(CMOS_REG, BIOS_RESET); 2196 mpbiosreason = inb(CMOS_DATA); 2197 2198 /* setup a vector to our boot code */ 2199 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; 2200 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); 2201 outb(CMOS_REG, BIOS_RESET); 2202 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ 2203 2204 /* 2205 * If we have a TSC we can figure out the SMI interrupt rate. 2206 * The SMI does not necessarily use a constant rate. Spend 2207 * up to 250ms trying to figure it out. 2208 */ 2209 smibest = 0; 2210 if (cpu_feature & CPUID_TSC) { 2211 set_apic_timer(275000); 2212 smilast = read_apic_timer(); 2213 for (x = 0; x < 20 && read_apic_timer(); ++x) { 2214 smicount = smitest(); 2215 if (smibest == 0 || smilast - smicount < smibest) 2216 smibest = smilast - smicount; 2217 smilast = smicount; 2218 } 2219 if (smibest > 250000) 2220 smibest = 0; 2221 if (smibest) { 2222 smibest = smibest * (int64_t)1000000 / 2223 get_apic_timer_frequency(); 2224 } 2225 } 2226 if (smibest) 2227 kprintf("SMI Frequency (worst case): %d Hz (%d us)\n", 2228 1000000 / smibest, smibest); 2229 2230 kprintf("SMP: Starting %d APs: ", mp_naps); 2231 /* start each AP */ 2232 for (x = 1; x <= mp_naps; ++x) { 2233 2234 /* This is a bit verbose, it will go away soon. */ 2235 2236 /* first page of AP's private space */ 2237 pg = x * x86_64_btop(sizeof(struct privatespace)); 2238 2239 /* allocate new private data page(s) */ 2240 gd = (struct mdglobaldata *)kmem_alloc(&kernel_map, 2241 MDGLOBALDATA_BASEALLOC_SIZE); 2242 2243 gd = &CPU_prvspace[x].mdglobaldata; /* official location */ 2244 bzero(gd, sizeof(*gd)); 2245 gd->mi.gd_prvspace = ps = &CPU_prvspace[x]; 2246 2247 /* prime data page for it to use */ 2248 mi_gdinit(&gd->mi, x); 2249 cpu_gdinit(gd, x); 2250 gd->mi.gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * (mp_naps + 1)); 2251 bzero(gd->mi.gd_ipiq, sizeof(lwkt_ipiq) * (mp_naps + 1)); 2252 2253 /* setup a vector to our boot code */ 2254 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; 2255 *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4); 2256 outb(CMOS_REG, BIOS_RESET); 2257 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ 2258 2259 /* 2260 * Setup the AP boot stack 2261 */ 2262 bootSTK = &ps->idlestack[UPAGES*PAGE_SIZE/2]; 2263 bootAP = x; 2264 2265 /* attempt to start the Application Processor */ 2266 CHECK_INIT(99); /* setup checkpoints */ 2267 if (!start_ap(gd, boot_addr, smibest)) { 2268 kprintf("\nAP #%d (PHY# %d) failed!\n", 2269 x, CPU_TO_ID(x)); 2270 CHECK_PRINT("trace"); /* show checkpoints */ 2271 /* better panic as the AP may be running loose */ 2272 kprintf("panic y/n? [y] "); 2273 if (cngetc() != 'n') 2274 panic("bye-bye"); 2275 } 2276 CHECK_PRINT("trace"); /* show checkpoints */ 2277 2278 /* record its version info */ 2279 cpu_apic_versions[x] = cpu_apic_versions[0]; 2280 } 2281 2282 /* set ncpus to 1 + highest logical cpu. Not all may have come up */ 2283 ncpus = x; 2284 2285 /* ncpus2 -- ncpus rounded down to the nearest power of 2 */ 2286 for (shift = 0; (1 << shift) <= ncpus; ++shift) 2287 ; 2288 --shift; 2289 ncpus2_shift = shift; 2290 ncpus2 = 1 << shift; 2291 ncpus2_mask = ncpus2 - 1; 2292 2293 /* ncpus_fit -- ncpus rounded up to the nearest power of 2 */ 2294 if ((1 << shift) < ncpus) 2295 ++shift; 2296 ncpus_fit = 1 << shift; 2297 ncpus_fit_mask = ncpus_fit - 1; 2298 2299 /* build our map of 'other' CPUs */ 2300 mycpu->gd_other_cpus = smp_startup_mask & ~CPUMASK(mycpu->gd_cpuid); 2301 mycpu->gd_ipiq = (void *)kmem_alloc(&kernel_map, sizeof(lwkt_ipiq) * ncpus); 2302 bzero(mycpu->gd_ipiq, sizeof(lwkt_ipiq) * ncpus); 2303 2304 /* fill in our (BSP) APIC version */ 2305 cpu_apic_versions[0] = lapic->version; 2306 2307 /* restore the warmstart vector */ 2308 *(u_long *) WARMBOOT_OFF = mpbioswarmvec; 2309 outb(CMOS_REG, BIOS_RESET); 2310 outb(CMOS_DATA, mpbiosreason); 2311 2312 /* 2313 * NOTE! The idlestack for the BSP was setup by locore. Finish 2314 * up, clean out the P==V mapping we did earlier. 2315 */ 2316 pmap_set_opt(); 2317 2318 /* number of APs actually started */ 2319 return ncpus - 1; 2320 } 2321 2322 2323 /* 2324 * load the 1st level AP boot code into base memory. 2325 */ 2326 2327 /* targets for relocation */ 2328 extern void bigJump(void); 2329 extern void bootCodeSeg(void); 2330 extern void bootDataSeg(void); 2331 extern void MPentry(void); 2332 extern u_int MP_GDT; 2333 extern u_int mp_gdtbase; 2334 2335 #if 0 2336 2337 static void 2338 install_ap_tramp(u_int boot_addr) 2339 { 2340 int x; 2341 int size = *(int *) ((u_long) & bootMP_size); 2342 u_char *src = (u_char *) ((u_long) bootMP); 2343 u_char *dst = (u_char *) boot_addr + KERNBASE; 2344 u_int boot_base = (u_int) bootMP; 2345 u_int8_t *dst8; 2346 u_int16_t *dst16; 2347 u_int32_t *dst32; 2348 2349 POSTCODE(INSTALL_AP_TRAMP_POST); 2350 2351 for (x = 0; x < size; ++x) 2352 *dst++ = *src++; 2353 2354 /* 2355 * modify addresses in code we just moved to basemem. unfortunately we 2356 * need fairly detailed info about mpboot.s for this to work. changes 2357 * to mpboot.s might require changes here. 2358 */ 2359 2360 /* boot code is located in KERNEL space */ 2361 dst = (u_char *) boot_addr + KERNBASE; 2362 2363 /* modify the lgdt arg */ 2364 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); 2365 *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base); 2366 2367 /* modify the ljmp target for MPentry() */ 2368 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); 2369 *dst32 = ((u_int) MPentry - KERNBASE); 2370 2371 /* modify the target for boot code segment */ 2372 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); 2373 dst8 = (u_int8_t *) (dst16 + 1); 2374 *dst16 = (u_int) boot_addr & 0xffff; 2375 *dst8 = ((u_int) boot_addr >> 16) & 0xff; 2376 2377 /* modify the target for boot data segment */ 2378 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); 2379 dst8 = (u_int8_t *) (dst16 + 1); 2380 *dst16 = (u_int) boot_addr & 0xffff; 2381 *dst8 = ((u_int) boot_addr >> 16) & 0xff; 2382 } 2383 2384 #endif 2385 2386 /* 2387 * This function starts the AP (application processor) identified 2388 * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 2389 * to accomplish this. This is necessary because of the nuances 2390 * of the different hardware we might encounter. It ain't pretty, 2391 * but it seems to work. 2392 * 2393 * NOTE: eventually an AP gets to ap_init(), which is called just 2394 * before the AP goes into the LWKT scheduler's idle loop. 2395 */ 2396 static int 2397 start_ap(struct mdglobaldata *gd, u_int boot_addr, int smibest) 2398 { 2399 int physical_cpu; 2400 int vector; 2401 u_long icr_lo, icr_hi; 2402 2403 POSTCODE(START_AP_POST); 2404 2405 /* get the PHYSICAL APIC ID# */ 2406 physical_cpu = CPU_TO_ID(gd->mi.gd_cpuid); 2407 2408 /* calculate the vector */ 2409 vector = (boot_addr >> 12) & 0xff; 2410 2411 /* We don't want anything interfering */ 2412 cpu_disable_intr(); 2413 2414 /* Make sure the target cpu sees everything */ 2415 wbinvd(); 2416 2417 /* 2418 * Try to detect when a SMI has occurred, wait up to 200ms. 2419 * 2420 * If a SMI occurs during an AP reset but before we issue 2421 * the STARTUP command, the AP may brick. To work around 2422 * this problem we hold off doing the AP startup until 2423 * after we have detected the SMI. Hopefully another SMI 2424 * will not occur before we finish the AP startup. 2425 * 2426 * Retries don't seem to help. SMIs have a window of opportunity 2427 * and if USB->legacy keyboard emulation is enabled in the BIOS 2428 * the interrupt rate can be quite high. 2429 * 2430 * NOTE: Don't worry about the L1 cache load, it might bloat 2431 * ldelta a little but ndelta will be so huge when the SMI 2432 * occurs the detection logic will still work fine. 2433 */ 2434 if (smibest) { 2435 set_apic_timer(200000); 2436 smitest(); 2437 } 2438 2439 /* 2440 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting 2441 * and running the target CPU. OR this INIT IPI might be latched (P5 2442 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be 2443 * ignored. 2444 * 2445 * see apic/apicreg.h for icr bit definitions. 2446 * 2447 * TIME CRITICAL CODE, DO NOT DO ANY KPRINTFS IN THE HOT PATH. 2448 */ 2449 2450 /* 2451 * Setup the address for the target AP. We can setup 2452 * icr_hi once and then just trigger operations with 2453 * icr_lo. 2454 */ 2455 icr_hi = lapic->icr_hi & ~APIC_ID_MASK; 2456 icr_hi |= (physical_cpu << 24); 2457 icr_lo = lapic->icr_lo & 0xfff00000; 2458 lapic->icr_hi = icr_hi; 2459 2460 /* 2461 * Do an INIT IPI: assert RESET 2462 * 2463 * Use edge triggered mode to assert INIT 2464 */ 2465 lapic->icr_lo = icr_lo | 0x00004500; 2466 while (lapic->icr_lo & APIC_DELSTAT_MASK) 2467 /* spin */ ; 2468 2469 /* 2470 * The spec calls for a 10ms delay but we may have to use a 2471 * MUCH lower delay to avoid bricking an AP due to a fast SMI 2472 * interrupt. We have other loops here too and dividing by 2 2473 * doesn't seem to be enough even after subtracting 350us, 2474 * so we divide by 4. 2475 * 2476 * Our minimum delay is 150uS, maximum is 10ms. If no SMI 2477 * interrupt was detected we use the full 10ms. 2478 */ 2479 if (smibest == 0) 2480 u_sleep(10000); 2481 else if (smibest < 150 * 4 + 350) 2482 u_sleep(150); 2483 else if ((smibest - 350) / 4 < 10000) 2484 u_sleep((smibest - 350) / 4); 2485 else 2486 u_sleep(10000); 2487 2488 /* 2489 * Do an INIT IPI: deassert RESET 2490 * 2491 * Use level triggered mode to deassert. It is unclear 2492 * why we need to do this. 2493 */ 2494 lapic->icr_lo = icr_lo | 0x00008500; 2495 while (lapic->icr_lo & APIC_DELSTAT_MASK) 2496 /* spin */ ; 2497 u_sleep(150); /* wait 150us */ 2498 2499 /* 2500 * Next we do a STARTUP IPI: the previous INIT IPI might still be 2501 * latched, (P5 bug) this 1st STARTUP would then terminate 2502 * immediately, and the previously started INIT IPI would continue. OR 2503 * the previous INIT IPI has already run. and this STARTUP IPI will 2504 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI 2505 * will run. 2506 */ 2507 lapic->icr_lo = icr_lo | 0x00000600 | vector; 2508 while (lapic->icr_lo & APIC_DELSTAT_MASK) 2509 /* spin */ ; 2510 u_sleep(200); /* wait ~200uS */ 2511 2512 /* 2513 * Finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF 2514 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR 2515 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is 2516 * recognized after hardware RESET or INIT IPI. 2517 */ 2518 lapic->icr_lo = icr_lo | 0x00000600 | vector; 2519 while (lapic->icr_lo & APIC_DELSTAT_MASK) 2520 /* spin */ ; 2521 2522 /* Resume normal operation */ 2523 cpu_enable_intr(); 2524 2525 /* wait for it to start, see ap_init() */ 2526 set_apic_timer(5000000);/* == 5 seconds */ 2527 while (read_apic_timer()) { 2528 if (smp_startup_mask & CPUMASK(gd->mi.gd_cpuid)) 2529 return 1; /* return SUCCESS */ 2530 } 2531 2532 return 0; /* return FAILURE */ 2533 } 2534 2535 static 2536 int 2537 smitest(void) 2538 { 2539 int64_t ltsc; 2540 int64_t ntsc; 2541 int64_t ldelta; 2542 int64_t ndelta; 2543 int count; 2544 2545 ldelta = 0; 2546 ndelta = 0; 2547 while (read_apic_timer()) { 2548 ltsc = rdtsc(); 2549 for (count = 0; count < 100; ++count) 2550 ntsc = rdtsc(); /* force loop to occur */ 2551 if (ldelta) { 2552 ndelta = ntsc - ltsc; 2553 if (ldelta > ndelta) 2554 ldelta = ndelta; 2555 if (ndelta > ldelta * 2) 2556 break; 2557 } else { 2558 ldelta = ntsc - ltsc; 2559 } 2560 } 2561 return(read_apic_timer()); 2562 } 2563 2564 /* 2565 * Synchronously flush the TLB on all other CPU's. The current cpu's 2566 * TLB is not flushed. If the caller wishes to flush the current cpu's 2567 * TLB the caller must call cpu_invltlb() in addition to smp_invltlb(). 2568 * 2569 * NOTE: If for some reason we were unable to start all cpus we cannot 2570 * safely use broadcast IPIs. 2571 */ 2572 2573 static cpumask_t smp_invltlb_req; 2574 2575 #define SMP_INVLTLB_DEBUG 2576 2577 void 2578 smp_invltlb(void) 2579 { 2580 #ifdef SMP 2581 struct mdglobaldata *md = mdcpu; 2582 #ifdef SMP_INVLTLB_DEBUG 2583 long count = 0; 2584 long xcount = 0; 2585 #endif 2586 2587 crit_enter_gd(&md->mi); 2588 md->gd_invltlb_ret = 0; 2589 ++md->mi.gd_cnt.v_smpinvltlb; 2590 atomic_set_cpumask(&smp_invltlb_req, md->mi.gd_cpumask); 2591 #ifdef SMP_INVLTLB_DEBUG 2592 again: 2593 #endif 2594 if (smp_startup_mask == smp_active_mask) { 2595 all_but_self_ipi(XINVLTLB_OFFSET); 2596 } else { 2597 selected_apic_ipi(smp_active_mask & ~md->mi.gd_cpumask, 2598 XINVLTLB_OFFSET, APIC_DELMODE_FIXED); 2599 } 2600 2601 #ifdef SMP_INVLTLB_DEBUG 2602 if (xcount) 2603 kprintf("smp_invltlb: ipi sent\n"); 2604 #endif 2605 while ((md->gd_invltlb_ret & smp_active_mask & ~md->mi.gd_cpumask) != 2606 (smp_active_mask & ~md->mi.gd_cpumask)) { 2607 cpu_mfence(); 2608 cpu_pause(); 2609 #ifdef SMP_INVLTLB_DEBUG 2610 /* DEBUGGING */ 2611 if (++count == 400000000) { 2612 print_backtrace(-1); 2613 kprintf("smp_invltlb: endless loop %08lx %08lx, " 2614 "rflags %016jx retry", 2615 (long)md->gd_invltlb_ret, 2616 (long)smp_invltlb_req, 2617 (intmax_t)read_rflags()); 2618 __asm __volatile ("sti"); 2619 ++xcount; 2620 if (xcount > 2) 2621 lwkt_process_ipiq(); 2622 if (xcount > 3) { 2623 int bcpu = BSFCPUMASK(~md->gd_invltlb_ret & 2624 ~md->mi.gd_cpumask & 2625 smp_active_mask); 2626 globaldata_t xgd; 2627 2628 kprintf("bcpu %d\n", bcpu); 2629 xgd = globaldata_find(bcpu); 2630 kprintf("thread %p %s\n", xgd->gd_curthread, xgd->gd_curthread->td_comm); 2631 } 2632 if (xcount > 5) 2633 Debugger("giving up"); 2634 count = 0; 2635 goto again; 2636 } 2637 #endif 2638 } 2639 atomic_clear_cpumask(&smp_invltlb_req, md->mi.gd_cpumask); 2640 crit_exit_gd(&md->mi); 2641 #endif 2642 } 2643 2644 #ifdef SMP 2645 2646 /* 2647 * Called from Xinvltlb assembly with interrupts disabled. We didn't 2648 * bother to bump the critical section count or nested interrupt count 2649 * so only do very low level operations here. 2650 */ 2651 void 2652 smp_invltlb_intr(void) 2653 { 2654 struct mdglobaldata *md = mdcpu; 2655 struct mdglobaldata *omd; 2656 cpumask_t mask; 2657 int cpu; 2658 2659 cpu_mfence(); 2660 mask = smp_invltlb_req; 2661 cpu_invltlb(); 2662 while (mask) { 2663 cpu = BSFCPUMASK(mask); 2664 mask &= ~CPUMASK(cpu); 2665 omd = (struct mdglobaldata *)globaldata_find(cpu); 2666 atomic_set_cpumask(&omd->gd_invltlb_ret, md->mi.gd_cpumask); 2667 } 2668 } 2669 2670 #endif 2671 2672 /* 2673 * When called the executing CPU will send an IPI to all other CPUs 2674 * requesting that they halt execution. 2675 * 2676 * Usually (but not necessarily) called with 'other_cpus' as its arg. 2677 * 2678 * - Signals all CPUs in map to stop. 2679 * - Waits for each to stop. 2680 * 2681 * Returns: 2682 * -1: error 2683 * 0: NA 2684 * 1: ok 2685 * 2686 * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs 2687 * from executing at same time. 2688 */ 2689 int 2690 stop_cpus(cpumask_t map) 2691 { 2692 map &= smp_active_mask; 2693 2694 /* send the Xcpustop IPI to all CPUs in map */ 2695 selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED); 2696 2697 while ((stopped_cpus & map) != map) 2698 /* spin */ ; 2699 2700 return 1; 2701 } 2702 2703 2704 /* 2705 * Called by a CPU to restart stopped CPUs. 2706 * 2707 * Usually (but not necessarily) called with 'stopped_cpus' as its arg. 2708 * 2709 * - Signals all CPUs in map to restart. 2710 * - Waits for each to restart. 2711 * 2712 * Returns: 2713 * -1: error 2714 * 0: NA 2715 * 1: ok 2716 */ 2717 int 2718 restart_cpus(cpumask_t map) 2719 { 2720 /* signal other cpus to restart */ 2721 started_cpus = map & smp_active_mask; 2722 2723 while ((stopped_cpus & map) != 0) /* wait for each to clear its bit */ 2724 /* spin */ ; 2725 2726 return 1; 2727 } 2728 2729 /* 2730 * This is called once the mpboot code has gotten us properly relocated 2731 * and the MMU turned on, etc. ap_init() is actually the idle thread, 2732 * and when it returns the scheduler will call the real cpu_idle() main 2733 * loop for the idlethread. Interrupts are disabled on entry and should 2734 * remain disabled at return. 2735 */ 2736 void 2737 ap_init(void) 2738 { 2739 u_int apic_id; 2740 2741 /* 2742 * Adjust smp_startup_mask to signal the BSP that we have started 2743 * up successfully. Note that we do not yet hold the BGL. The BSP 2744 * is waiting for our signal. 2745 * 2746 * We can't set our bit in smp_active_mask yet because we are holding 2747 * interrupts physically disabled and remote cpus could deadlock 2748 * trying to send us an IPI. 2749 */ 2750 smp_startup_mask |= CPUMASK(mycpu->gd_cpuid); 2751 cpu_mfence(); 2752 2753 /* 2754 * Interlock for finalization. Wait until mp_finish is non-zero, 2755 * then get the MP lock. 2756 * 2757 * Note: We are in a critical section. 2758 * 2759 * Note: we are the idle thread, we can only spin. 2760 * 2761 * Note: The load fence is memory volatile and prevents the compiler 2762 * from improperly caching mp_finish, and the cpu from improperly 2763 * caching it. 2764 */ 2765 while (mp_finish == 0) 2766 cpu_lfence(); 2767 while (try_mplock() == 0) 2768 ; 2769 2770 if (cpu_feature & CPUID_TSC) { 2771 /* 2772 * The BSP is constantly updating tsc0_offset, figure out 2773 * the relative difference to synchronize ktrdump. 2774 */ 2775 tsc_offsets[mycpu->gd_cpuid] = rdtsc() - tsc0_offset; 2776 } 2777 2778 /* BSP may have changed PTD while we're waiting for the lock */ 2779 cpu_invltlb(); 2780 2781 #if defined(I586_CPU) && !defined(NO_F00F_HACK) 2782 lidt(&r_idt); 2783 #endif 2784 2785 /* Build our map of 'other' CPUs. */ 2786 mycpu->gd_other_cpus = smp_startup_mask & ~CPUMASK(mycpu->gd_cpuid); 2787 2788 kprintf(" %d", mycpu->gd_cpuid); 2789 2790 /* A quick check from sanity claus */ 2791 apic_id = (apic_id_to_logical[(lapic->id & 0xff000000) >> 24]); 2792 if (mycpu->gd_cpuid != apic_id) { 2793 kprintf("SMP: cpuid = %d\n", mycpu->gd_cpuid); 2794 kprintf("SMP: apic_id = %d lapicid %d\n", 2795 apic_id, (lapic->id & 0xff000000) >> 24); 2796 #if JGXXX 2797 kprintf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]); 2798 #endif 2799 panic("cpuid mismatch! boom!!"); 2800 } 2801 2802 /* Initialize AP's local APIC for irq's */ 2803 apic_initialize(FALSE); 2804 2805 /* Set memory range attributes for this CPU to match the BSP */ 2806 mem_range_AP_init(); 2807 2808 /* 2809 * Once we go active we must process any IPIQ messages that may 2810 * have been queued, because no actual IPI will occur until we 2811 * set our bit in the smp_active_mask. If we don't the IPI 2812 * message interlock could be left set which would also prevent 2813 * further IPIs. 2814 * 2815 * The idle loop doesn't expect the BGL to be held and while 2816 * lwkt_switch() normally cleans things up this is a special case 2817 * because we returning almost directly into the idle loop. 2818 * 2819 * The idle thread is never placed on the runq, make sure 2820 * nothing we've done put it there. 2821 */ 2822 KKASSERT(get_mplock_count(curthread) == 1); 2823 smp_active_mask |= CPUMASK(mycpu->gd_cpuid); 2824 2825 /* 2826 * Enable interrupts here. idle_restore will also do it, but 2827 * doing it here lets us clean up any strays that got posted to 2828 * the CPU during the AP boot while we are still in a critical 2829 * section. 2830 */ 2831 __asm __volatile("sti; pause; pause"::); 2832 mdcpu->gd_fpending = 0; 2833 2834 initclocks_pcpu(); /* clock interrupts (via IPIs) */ 2835 lwkt_process_ipiq(); 2836 2837 /* 2838 * Releasing the mp lock lets the BSP finish up the SMP init 2839 */ 2840 rel_mplock(); 2841 KKASSERT((curthread->td_flags & TDF_RUNQ) == 0); 2842 } 2843 2844 /* 2845 * Get SMP fully working before we start initializing devices. 2846 */ 2847 static 2848 void 2849 ap_finish(void) 2850 { 2851 mp_finish = 1; 2852 if (bootverbose) 2853 kprintf("Finish MP startup\n"); 2854 if (cpu_feature & CPUID_TSC) 2855 tsc0_offset = rdtsc(); 2856 tsc_offsets[0] = 0; 2857 rel_mplock(); 2858 while (smp_active_mask != smp_startup_mask) { 2859 cpu_lfence(); 2860 if (cpu_feature & CPUID_TSC) 2861 tsc0_offset = rdtsc(); 2862 } 2863 while (try_mplock() == 0) 2864 ; 2865 kprintf("\n"); 2866 if (bootverbose) { 2867 kprintf("Active CPU Mask: %016jx\n", 2868 (uintmax_t)smp_active_mask); 2869 } 2870 } 2871 2872 SYSINIT(finishsmp, SI_BOOT2_FINISH_SMP, SI_ORDER_FIRST, ap_finish, NULL) 2873 2874 void 2875 cpu_send_ipiq(int dcpu) 2876 { 2877 if (CPUMASK(dcpu) & smp_active_mask) 2878 single_apic_ipi(dcpu, XIPIQ_OFFSET, APIC_DELMODE_FIXED); 2879 } 2880 2881 #if 0 /* single_apic_ipi_passive() not working yet */ 2882 /* 2883 * Returns 0 on failure, 1 on success 2884 */ 2885 int 2886 cpu_send_ipiq_passive(int dcpu) 2887 { 2888 int r = 0; 2889 if (CPUMASK(dcpu) & smp_active_mask) { 2890 r = single_apic_ipi_passive(dcpu, XIPIQ_OFFSET, 2891 APIC_DELMODE_FIXED); 2892 } 2893 return(r); 2894 } 2895 #endif 2896 2897 struct mptable_lapic_cbarg1 { 2898 int cpu_count; 2899 int ht_fixup; 2900 u_int ht_apicid_mask; 2901 }; 2902 2903 static int 2904 mptable_lapic_pass1_callback(void *xarg, const void *pos, int type) 2905 { 2906 const struct PROCENTRY *ent; 2907 struct mptable_lapic_cbarg1 *arg = xarg; 2908 2909 if (type != 0) 2910 return 0; 2911 ent = pos; 2912 2913 if ((ent->cpu_flags & PROCENTRY_FLAG_EN) == 0) 2914 return 0; 2915 2916 arg->cpu_count++; 2917 if (ent->apic_id < 32) { 2918 arg->ht_apicid_mask |= 1 << ent->apic_id; 2919 } else if (arg->ht_fixup) { 2920 kprintf("MPTABLE: lapic id > 32, disable HTT fixup\n"); 2921 arg->ht_fixup = 0; 2922 } 2923 return 0; 2924 } 2925 2926 struct mptable_lapic_cbarg2 { 2927 int cpu; 2928 int logical_cpus; 2929 int found_bsp; 2930 }; 2931 2932 static int 2933 mptable_lapic_pass2_callback(void *xarg, const void *pos, int type) 2934 { 2935 const struct PROCENTRY *ent; 2936 struct mptable_lapic_cbarg2 *arg = xarg; 2937 2938 if (type != 0) 2939 return 0; 2940 ent = pos; 2941 2942 if (ent->cpu_flags & PROCENTRY_FLAG_BP) { 2943 KKASSERT(!arg->found_bsp); 2944 arg->found_bsp = 1; 2945 } 2946 2947 if (processor_entry(ent, arg->cpu)) 2948 arg->cpu++; 2949 2950 if (arg->logical_cpus) { 2951 struct PROCENTRY proc; 2952 int i; 2953 2954 /* 2955 * Create fake mptable processor entries 2956 * and feed them to processor_entry() to 2957 * enumerate the logical CPUs. 2958 */ 2959 bzero(&proc, sizeof(proc)); 2960 proc.type = 0; 2961 proc.cpu_flags = PROCENTRY_FLAG_EN; 2962 proc.apic_id = ent->apic_id; 2963 2964 for (i = 1; i < arg->logical_cpus; i++) { 2965 proc.apic_id++; 2966 processor_entry(&proc, arg->cpu); 2967 arg->cpu++; 2968 } 2969 } 2970 return 0; 2971 } 2972 2973 static void 2974 mptable_imcr(struct mptable_pos *mpt) 2975 { 2976 /* record whether PIC or virtual-wire mode */ 2977 machintr_setvar_simple(MACHINTR_VAR_IMCR_PRESENT, 2978 mpt->mp_fps->mpfb2 & 0x80); 2979 } 2980 2981 struct mptable_lapic_enumerator { 2982 struct lapic_enumerator enumerator; 2983 vm_paddr_t mpfps_paddr; 2984 }; 2985 2986 static void 2987 mptable_lapic_default(void) 2988 { 2989 int ap_apicid, bsp_apicid; 2990 2991 mp_naps = 1; /* exclude BSP */ 2992 2993 /* Map local apic before the id field is accessed */ 2994 lapic_init(DEFAULT_APIC_BASE); 2995 2996 bsp_apicid = APIC_ID(lapic->id); 2997 ap_apicid = (bsp_apicid == 0) ? 1 : 0; 2998 2999 /* BSP */ 3000 mp_set_cpuids(0, bsp_apicid); 3001 /* one and only AP */ 3002 mp_set_cpuids(1, ap_apicid); 3003 } 3004 3005 /* 3006 * Configure: 3007 * mp_naps 3008 * ID_TO_CPU(N), APIC ID to logical CPU table 3009 * CPU_TO_ID(N), logical CPU to APIC ID table 3010 */ 3011 static void 3012 mptable_lapic_enumerate(struct lapic_enumerator *e) 3013 { 3014 struct mptable_pos mpt; 3015 struct mptable_lapic_cbarg1 arg1; 3016 struct mptable_lapic_cbarg2 arg2; 3017 mpcth_t cth; 3018 int error, logical_cpus = 0; 3019 vm_offset_t lapic_addr; 3020 vm_paddr_t mpfps_paddr; 3021 3022 mpfps_paddr = ((struct mptable_lapic_enumerator *)e)->mpfps_paddr; 3023 KKASSERT(mpfps_paddr != 0); 3024 3025 error = mptable_map(&mpt, mpfps_paddr); 3026 if (error) 3027 panic("mptable_lapic_enumerate mptable_map failed\n"); 3028 3029 KKASSERT(mpt.mp_fps != NULL); 3030 3031 /* 3032 * Check for use of 'default' configuration 3033 */ 3034 if (mpt.mp_fps->mpfb1 != 0) { 3035 mptable_lapic_default(); 3036 mptable_unmap(&mpt); 3037 return; 3038 } 3039 3040 cth = mpt.mp_cth; 3041 KKASSERT(cth != NULL); 3042 3043 /* Save local apic address */ 3044 lapic_addr = (vm_offset_t)cth->apic_address; 3045 KKASSERT(lapic_addr != 0); 3046 3047 /* 3048 * Find out how many CPUs do we have 3049 */ 3050 bzero(&arg1, sizeof(arg1)); 3051 arg1.ht_fixup = 1; /* Apply ht fixup by default */ 3052 3053 error = mptable_iterate_entries(cth, 3054 mptable_lapic_pass1_callback, &arg1); 3055 if (error) 3056 panic("mptable_iterate_entries(lapic_pass1) failed\n"); 3057 KKASSERT(arg1.cpu_count != 0); 3058 3059 /* See if we need to fixup HT logical CPUs. */ 3060 if (arg1.ht_fixup) { 3061 logical_cpus = mptable_hyperthread_fixup(arg1.ht_apicid_mask, 3062 arg1.cpu_count); 3063 if (logical_cpus != 0) 3064 arg1.cpu_count *= logical_cpus; 3065 } 3066 mp_naps = arg1.cpu_count; 3067 3068 /* Qualify the numbers again, after possible HT fixup */ 3069 if (mp_naps > MAXCPU) { 3070 kprintf("Warning: only using %d of %d available CPUs!\n", 3071 MAXCPU, mp_naps); 3072 DELAY(1000000); 3073 mp_naps = MAXCPU; 3074 } 3075 3076 --mp_naps; /* subtract the BSP */ 3077 3078 /* 3079 * Link logical CPU id to local apic id 3080 */ 3081 bzero(&arg2, sizeof(arg2)); 3082 arg2.cpu = 1; 3083 arg2.logical_cpus = logical_cpus; 3084 3085 error = mptable_iterate_entries(cth, 3086 mptable_lapic_pass2_callback, &arg2); 3087 if (error) 3088 panic("mptable_iterate_entries(lapic_pass2) failed\n"); 3089 KKASSERT(arg2.found_bsp); 3090 3091 /* Map local apic */ 3092 lapic_init(lapic_addr); 3093 3094 mptable_unmap(&mpt); 3095 } 3096 3097 static int 3098 mptable_lapic_probe(struct lapic_enumerator *e) 3099 { 3100 vm_paddr_t mpfps_paddr; 3101 3102 mpfps_paddr = mptable_probe(); 3103 if (mpfps_paddr == 0) 3104 return ENXIO; 3105 3106 ((struct mptable_lapic_enumerator *)e)->mpfps_paddr = mpfps_paddr; 3107 return 0; 3108 } 3109 3110 static struct mptable_lapic_enumerator mptable_lapic_enumerator = { 3111 .enumerator = { 3112 .lapic_prio = LAPIC_ENUM_PRIO_MPTABLE, 3113 .lapic_probe = mptable_lapic_probe, 3114 .lapic_enumerate = mptable_lapic_enumerate 3115 } 3116 }; 3117 3118 static void 3119 mptable_apic_register(void) 3120 { 3121 lapic_enumerator_register(&mptable_lapic_enumerator.enumerator); 3122 } 3123 SYSINIT(madt, SI_BOOT2_PRESMP, SI_ORDER_ANY, mptable_apic_register, 0); 3124