1 /*- 2 * Copyright (c) 1996, by Steve Passe 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. The name of the developer may NOT be used to endorse or promote products 11 * derived from this software without specific prior written permission. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 #include <sys/cdefs.h> 27 __FBSDID("$FreeBSD$"); 28 29 #include "opt_apic.h" 30 #include "opt_cpu.h" 31 #include "opt_kstack_pages.h" 32 #include "opt_pmap.h" 33 #include "opt_sched.h" 34 #include "opt_smp.h" 35 36 #if !defined(lint) 37 #if !defined(SMP) 38 #error How did you get here? 39 #endif 40 41 #ifndef DEV_APIC 42 #error The apic device is required for SMP, add "device apic" to your config file. 43 #endif 44 #endif /* not lint */ 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/bus.h> 49 #include <sys/cons.h> /* cngetc() */ 50 #include <sys/cpuset.h> 51 #ifdef GPROF 52 #include <sys/gmon.h> 53 #endif 54 #include <sys/kernel.h> 55 #include <sys/ktr.h> 56 #include <sys/lock.h> 57 #include <sys/malloc.h> 58 #include <sys/memrange.h> 59 #include <sys/mutex.h> 60 #include <sys/pcpu.h> 61 #include <sys/proc.h> 62 #include <sys/sched.h> 63 #include <sys/smp.h> 64 #include <sys/sysctl.h> 65 66 #include <vm/vm.h> 67 #include <vm/vm_param.h> 68 #include <vm/pmap.h> 69 #include <vm/vm_kern.h> 70 #include <vm/vm_extern.h> 71 72 #include <x86/apicreg.h> 73 #include <machine/clock.h> 74 #include <machine/cputypes.h> 75 #include <x86/mca.h> 76 #include <machine/md_var.h> 77 #include <machine/pcb.h> 78 #include <machine/psl.h> 79 #include <machine/smp.h> 80 #include <machine/specialreg.h> 81 #include <machine/cpu.h> 82 83 #define WARMBOOT_TARGET 0 84 #define WARMBOOT_OFF (KERNBASE + 0x0467) 85 #define WARMBOOT_SEG (KERNBASE + 0x0469) 86 87 #define CMOS_REG (0x70) 88 #define CMOS_DATA (0x71) 89 #define BIOS_RESET (0x0f) 90 #define BIOS_WARM (0x0a) 91 92 /* 93 * this code MUST be enabled here and in mpboot.s. 94 * it follows the very early stages of AP boot by placing values in CMOS ram. 95 * it NORMALLY will never be needed and thus the primitive method for enabling. 96 * 97 #define CHECK_POINTS 98 */ 99 100 #if defined(CHECK_POINTS) 101 #define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) 102 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) 103 104 #define CHECK_INIT(D); \ 105 CHECK_WRITE(0x34, (D)); \ 106 CHECK_WRITE(0x35, (D)); \ 107 CHECK_WRITE(0x36, (D)); \ 108 CHECK_WRITE(0x37, (D)); \ 109 CHECK_WRITE(0x38, (D)); \ 110 CHECK_WRITE(0x39, (D)); 111 112 #define CHECK_PRINT(S); \ 113 printf("%s: %d, %d, %d, %d, %d, %d\n", \ 114 (S), \ 115 CHECK_READ(0x34), \ 116 CHECK_READ(0x35), \ 117 CHECK_READ(0x36), \ 118 CHECK_READ(0x37), \ 119 CHECK_READ(0x38), \ 120 CHECK_READ(0x39)); 121 122 #else /* CHECK_POINTS */ 123 124 #define CHECK_INIT(D) 125 #define CHECK_PRINT(S) 126 #define CHECK_WRITE(A, D) 127 128 #endif /* CHECK_POINTS */ 129 130 extern struct pcpu __pcpu[]; 131 132 /* 133 * Local data and functions. 134 */ 135 136 static void install_ap_tramp(void); 137 static int start_all_aps(void); 138 static int start_ap(int apic_id); 139 140 static u_int boot_address; 141 142 /* 143 * Calculate usable address in base memory for AP trampoline code. 144 */ 145 u_int 146 mp_bootaddress(u_int basemem) 147 { 148 149 boot_address = trunc_page(basemem); /* round down to 4k boundary */ 150 if ((basemem - boot_address) < bootMP_size) 151 boot_address -= PAGE_SIZE; /* not enough, lower by 4k */ 152 153 return boot_address; 154 } 155 156 /* 157 * Initialize the IPI handlers and start up the AP's. 158 */ 159 void 160 cpu_mp_start(void) 161 { 162 int i; 163 164 /* Initialize the logical ID to APIC ID table. */ 165 for (i = 0; i < MAXCPU; i++) { 166 cpu_apic_ids[i] = -1; 167 cpu_ipi_pending[i] = 0; 168 } 169 170 /* Install an inter-CPU IPI for TLB invalidation */ 171 setidt(IPI_INVLTLB, IDTVEC(invltlb), 172 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 173 setidt(IPI_INVLPG, IDTVEC(invlpg), 174 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 175 setidt(IPI_INVLRNG, IDTVEC(invlrng), 176 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 177 178 /* Install an inter-CPU IPI for cache invalidation. */ 179 setidt(IPI_INVLCACHE, IDTVEC(invlcache), 180 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 181 182 /* Install an inter-CPU IPI for all-CPU rendezvous */ 183 setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), 184 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 185 186 /* Install generic inter-CPU IPI handler */ 187 setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler), 188 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 189 190 /* Install an inter-CPU IPI for CPU stop/restart */ 191 setidt(IPI_STOP, IDTVEC(cpustop), 192 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 193 194 /* Install an inter-CPU IPI for CPU suspend/resume */ 195 setidt(IPI_SUSPEND, IDTVEC(cpususpend), 196 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 197 198 /* Set boot_cpu_id if needed. */ 199 if (boot_cpu_id == -1) { 200 boot_cpu_id = PCPU_GET(apic_id); 201 cpu_info[boot_cpu_id].cpu_bsp = 1; 202 } else 203 KASSERT(boot_cpu_id == PCPU_GET(apic_id), 204 ("BSP's APIC ID doesn't match boot_cpu_id")); 205 206 /* Probe logical/physical core configuration. */ 207 topo_probe(); 208 209 assign_cpu_ids(); 210 211 /* Start each Application Processor */ 212 start_all_aps(); 213 214 set_interrupt_apic_ids(); 215 } 216 217 /* 218 * AP CPU's call this to initialize themselves. 219 */ 220 void 221 init_secondary(void) 222 { 223 struct pcpu *pc; 224 vm_offset_t addr; 225 int gsel_tss; 226 int x, myid; 227 u_int cr0; 228 229 /* bootAP is set in start_ap() to our ID. */ 230 myid = bootAP; 231 232 /* Get per-cpu data */ 233 pc = &__pcpu[myid]; 234 235 /* prime data page for it to use */ 236 pcpu_init(pc, myid, sizeof(struct pcpu)); 237 dpcpu_init(dpcpu, myid); 238 pc->pc_apic_id = cpu_apic_ids[myid]; 239 pc->pc_prvspace = pc; 240 pc->pc_curthread = 0; 241 242 fix_cpuid(); 243 244 gdt_segs[GPRIV_SEL].ssd_base = (int) pc; 245 gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; 246 247 for (x = 0; x < NGDT; x++) { 248 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); 249 } 250 251 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 252 r_gdt.rd_base = (int) &gdt[myid * NGDT]; 253 lgdt(&r_gdt); /* does magic intra-segment return */ 254 255 lidt(&r_idt); 256 257 lldt(_default_ldt); 258 PCPU_SET(currentldt, _default_ldt); 259 260 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 261 gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; 262 PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ 263 PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 264 PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); 265 PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd); 266 PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 267 ltr(gsel_tss); 268 269 PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd); 270 271 /* 272 * Set to a known state: 273 * Set by mpboot.s: CR0_PG, CR0_PE 274 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 275 */ 276 cr0 = rcr0(); 277 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); 278 load_cr0(cr0); 279 CHECK_WRITE(0x38, 5); 280 281 /* signal our startup to the BSP. */ 282 mp_naps++; 283 CHECK_WRITE(0x39, 6); 284 285 /* Spin until the BSP releases the AP's. */ 286 while (atomic_load_acq_int(&aps_ready) == 0) 287 ia32_pause(); 288 289 /* BSP may have changed PTD while we were waiting */ 290 invltlb(); 291 for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE) 292 invlpg(addr); 293 294 #if defined(I586_CPU) && !defined(NO_F00F_HACK) 295 lidt(&r_idt); 296 #endif 297 298 init_secondary_tail(); 299 } 300 301 /* 302 * start each AP in our list 303 */ 304 /* Lowest 1MB is already mapped: don't touch*/ 305 #define TMPMAP_START 1 306 static int 307 start_all_aps(void) 308 { 309 u_char mpbiosreason; 310 u_int32_t mpbioswarmvec; 311 int apic_id, cpu, i; 312 313 mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 314 315 /* install the AP 1st level boot code */ 316 install_ap_tramp(); 317 318 /* save the current value of the warm-start vector */ 319 mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); 320 outb(CMOS_REG, BIOS_RESET); 321 mpbiosreason = inb(CMOS_DATA); 322 323 /* set up temporary P==V mapping for AP boot */ 324 /* XXX this is a hack, we should boot the AP on its own stack/PTD */ 325 for (i = TMPMAP_START; i < NKPT; i++) 326 PTD[i] = PTD[KPTDI + i]; 327 invltlb(); 328 329 /* start each AP */ 330 for (cpu = 1; cpu < mp_ncpus; cpu++) { 331 apic_id = cpu_apic_ids[cpu]; 332 333 /* allocate and set up a boot stack data page */ 334 bootstacks[cpu] = 335 (char *)kmem_malloc(kernel_arena, kstack_pages * PAGE_SIZE, 336 M_WAITOK | M_ZERO); 337 dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, 338 M_WAITOK | M_ZERO); 339 /* setup a vector to our boot code */ 340 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; 341 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); 342 outb(CMOS_REG, BIOS_RESET); 343 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ 344 345 bootSTK = (char *)bootstacks[cpu] + kstack_pages * 346 PAGE_SIZE - 4; 347 bootAP = cpu; 348 349 /* attempt to start the Application Processor */ 350 CHECK_INIT(99); /* setup checkpoints */ 351 if (!start_ap(apic_id)) { 352 printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); 353 CHECK_PRINT("trace"); /* show checkpoints */ 354 /* better panic as the AP may be running loose */ 355 printf("panic y/n? [y] "); 356 if (cngetc() != 'n') 357 panic("bye-bye"); 358 } 359 CHECK_PRINT("trace"); /* show checkpoints */ 360 361 CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ 362 } 363 364 /* restore the warmstart vector */ 365 *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; 366 367 outb(CMOS_REG, BIOS_RESET); 368 outb(CMOS_DATA, mpbiosreason); 369 370 /* Undo V==P hack from above */ 371 for (i = TMPMAP_START; i < NKPT; i++) 372 PTD[i] = 0; 373 pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); 374 375 /* number of APs actually started */ 376 return mp_naps; 377 } 378 379 /* 380 * load the 1st level AP boot code into base memory. 381 */ 382 383 /* targets for relocation */ 384 extern void bigJump(void); 385 extern void bootCodeSeg(void); 386 extern void bootDataSeg(void); 387 extern void MPentry(void); 388 extern u_int MP_GDT; 389 extern u_int mp_gdtbase; 390 391 static void 392 install_ap_tramp(void) 393 { 394 int x; 395 int size = *(int *) ((u_long) & bootMP_size); 396 vm_offset_t va = boot_address + KERNBASE; 397 u_char *src = (u_char *) ((u_long) bootMP); 398 u_char *dst = (u_char *) va; 399 u_int boot_base = (u_int) bootMP; 400 u_int8_t *dst8; 401 u_int16_t *dst16; 402 u_int32_t *dst32; 403 404 KASSERT (size <= PAGE_SIZE, 405 ("'size' do not fit into PAGE_SIZE, as expected.")); 406 pmap_kenter(va, boot_address); 407 pmap_invalidate_page (kernel_pmap, va); 408 for (x = 0; x < size; ++x) 409 *dst++ = *src++; 410 411 /* 412 * modify addresses in code we just moved to basemem. unfortunately we 413 * need fairly detailed info about mpboot.s for this to work. changes 414 * to mpboot.s might require changes here. 415 */ 416 417 /* boot code is located in KERNEL space */ 418 dst = (u_char *) va; 419 420 /* modify the lgdt arg */ 421 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); 422 *dst32 = boot_address + ((u_int) & MP_GDT - boot_base); 423 424 /* modify the ljmp target for MPentry() */ 425 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); 426 *dst32 = ((u_int) MPentry - KERNBASE); 427 428 /* modify the target for boot code segment */ 429 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); 430 dst8 = (u_int8_t *) (dst16 + 1); 431 *dst16 = (u_int) boot_address & 0xffff; 432 *dst8 = ((u_int) boot_address >> 16) & 0xff; 433 434 /* modify the target for boot data segment */ 435 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); 436 dst8 = (u_int8_t *) (dst16 + 1); 437 *dst16 = (u_int) boot_address & 0xffff; 438 *dst8 = ((u_int) boot_address >> 16) & 0xff; 439 } 440 441 /* 442 * This function starts the AP (application processor) identified 443 * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 444 * to accomplish this. This is necessary because of the nuances 445 * of the different hardware we might encounter. It isn't pretty, 446 * but it seems to work. 447 */ 448 static int 449 start_ap(int apic_id) 450 { 451 int vector, ms; 452 int cpus; 453 454 /* calculate the vector */ 455 vector = (boot_address >> 12) & 0xff; 456 457 /* used as a watchpoint to signal AP startup */ 458 cpus = mp_naps; 459 460 ipi_startup(apic_id, vector); 461 462 /* Wait up to 5 seconds for it to start. */ 463 for (ms = 0; ms < 5000; ms++) { 464 if (mp_naps > cpus) 465 return 1; /* return SUCCESS */ 466 DELAY(1000); 467 } 468 return 0; /* return FAILURE */ 469 } 470