1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 1996, by Steve Passe 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. The name of the developer may NOT be used to endorse or promote products 13 * derived from this software without specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_apic.h" 32 #include "opt_cpu.h" 33 #include "opt_kstack_pages.h" 34 #include "opt_pmap.h" 35 #include "opt_sched.h" 36 #include "opt_smp.h" 37 38 #if !defined(lint) 39 #if !defined(SMP) 40 #error How did you get here? 41 #endif 42 43 #ifndef DEV_APIC 44 #error The apic device is required for SMP, add "device apic" to your config file. 45 #endif 46 #endif /* not lint */ 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/bus.h> 51 #include <sys/cons.h> /* cngetc() */ 52 #include <sys/cpuset.h> 53 #ifdef GPROF 54 #include <sys/gmon.h> 55 #endif 56 #include <sys/kernel.h> 57 #include <sys/ktr.h> 58 #include <sys/lock.h> 59 #include <sys/malloc.h> 60 #include <sys/memrange.h> 61 #include <sys/mutex.h> 62 #include <sys/pcpu.h> 63 #include <sys/proc.h> 64 #include <sys/sched.h> 65 #include <sys/smp.h> 66 #include <sys/sysctl.h> 67 68 #include <vm/vm.h> 69 #include <vm/vm_param.h> 70 #include <vm/pmap.h> 71 #include <vm/vm_kern.h> 72 #include <vm/vm_extern.h> 73 74 #include <x86/apicreg.h> 75 #include <machine/clock.h> 76 #include <machine/cpu.h> 77 #include <machine/cputypes.h> 78 #include <x86/mca.h> 79 #include <machine/md_var.h> 80 #include <machine/pcb.h> 81 #include <machine/psl.h> 82 #include <machine/smp.h> 83 #include <machine/specialreg.h> 84 #include <x86/ucode.h> 85 86 #define WARMBOOT_TARGET 0 87 #define WARMBOOT_OFF (PMAP_MAP_LOW + 0x0467) 88 #define WARMBOOT_SEG (PMAP_MAP_LOW + 0x0469) 89 90 #define CMOS_REG (0x70) 91 #define CMOS_DATA (0x71) 92 #define BIOS_RESET (0x0f) 93 #define BIOS_WARM (0x0a) 94 95 /* 96 * this code MUST be enabled here and in mpboot.s. 97 * it follows the very early stages of AP boot by placing values in CMOS ram. 98 * it NORMALLY will never be needed and thus the primitive method for enabling. 99 * 100 #define CHECK_POINTS 101 */ 102 103 #if defined(CHECK_POINTS) 104 #define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) 105 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) 106 107 #define CHECK_INIT(D); \ 108 CHECK_WRITE(0x34, (D)); \ 109 CHECK_WRITE(0x35, (D)); \ 110 CHECK_WRITE(0x36, (D)); \ 111 CHECK_WRITE(0x37, (D)); \ 112 CHECK_WRITE(0x38, (D)); \ 113 CHECK_WRITE(0x39, (D)); 114 115 #define CHECK_PRINT(S); \ 116 printf("%s: %d, %d, %d, %d, %d, %d\n", \ 117 (S), \ 118 CHECK_READ(0x34), \ 119 CHECK_READ(0x35), \ 120 CHECK_READ(0x36), \ 121 CHECK_READ(0x37), \ 122 CHECK_READ(0x38), \ 123 CHECK_READ(0x39)); 124 125 #else /* CHECK_POINTS */ 126 127 #define CHECK_INIT(D) 128 #define CHECK_PRINT(S) 129 #define CHECK_WRITE(A, D) 130 131 #endif /* CHECK_POINTS */ 132 133 /* 134 * Local data and functions. 135 */ 136 137 static void install_ap_tramp(void); 138 static int start_all_aps(void); 139 static int start_ap(int apic_id); 140 141 static char *ap_copyout_buf; 142 static char *ap_tramp_stack_base; 143 /* 144 * Initialize the IPI handlers and start up the AP's. 145 */ 146 void 147 cpu_mp_start(void) 148 { 149 int i; 150 151 /* Initialize the logical ID to APIC ID table. */ 152 for (i = 0; i < MAXCPU; i++) { 153 cpu_apic_ids[i] = -1; 154 } 155 156 /* Install an inter-CPU IPI for TLB invalidation */ 157 setidt(IPI_INVLTLB, IDTVEC(invltlb), 158 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 159 setidt(IPI_INVLPG, IDTVEC(invlpg), 160 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 161 setidt(IPI_INVLRNG, IDTVEC(invlrng), 162 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 163 164 /* Install an inter-CPU IPI for cache invalidation. */ 165 setidt(IPI_INVLCACHE, IDTVEC(invlcache), 166 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 167 168 /* Install an inter-CPU IPI for all-CPU rendezvous */ 169 setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), 170 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 171 172 /* Install generic inter-CPU IPI handler */ 173 setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler), 174 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 175 176 /* Install an inter-CPU IPI for CPU stop/restart */ 177 setidt(IPI_STOP, IDTVEC(cpustop), 178 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 179 180 /* Install an inter-CPU IPI for CPU suspend/resume */ 181 setidt(IPI_SUSPEND, IDTVEC(cpususpend), 182 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 183 184 /* Set boot_cpu_id if needed. */ 185 if (boot_cpu_id == -1) { 186 boot_cpu_id = PCPU_GET(apic_id); 187 cpu_info[boot_cpu_id].cpu_bsp = 1; 188 } else 189 KASSERT(boot_cpu_id == PCPU_GET(apic_id), 190 ("BSP's APIC ID doesn't match boot_cpu_id")); 191 192 /* Probe logical/physical core configuration. */ 193 topo_probe(); 194 195 assign_cpu_ids(); 196 197 /* Start each Application Processor */ 198 start_all_aps(); 199 200 set_interrupt_apic_ids(); 201 } 202 203 /* 204 * AP CPU's call this to initialize themselves. 205 */ 206 void 207 init_secondary(void) 208 { 209 struct pcpu *pc; 210 struct i386tss *common_tssp; 211 struct region_descriptor r_gdt, r_idt; 212 int gsel_tss, myid, x; 213 u_int cr0; 214 215 /* bootAP is set in start_ap() to our ID. */ 216 myid = bootAP; 217 218 /* Update microcode before doing anything else. */ 219 ucode_load_ap(myid); 220 221 /* Get per-cpu data */ 222 pc = &__pcpu[myid]; 223 224 /* prime data page for it to use */ 225 pcpu_init(pc, myid, sizeof(struct pcpu)); 226 dpcpu_init(dpcpu, myid); 227 pc->pc_apic_id = cpu_apic_ids[myid]; 228 pc->pc_prvspace = pc; 229 pc->pc_curthread = 0; 230 pc->pc_common_tssp = common_tssp = &(__pcpu[0].pc_common_tssp)[myid]; 231 232 fix_cpuid(); 233 234 gdt_segs[GPRIV_SEL].ssd_base = (int)pc; 235 gdt_segs[GPROC0_SEL].ssd_base = (int)common_tssp; 236 gdt_segs[GLDT_SEL].ssd_base = (int)ldt; 237 238 for (x = 0; x < NGDT; x++) { 239 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); 240 } 241 242 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 243 r_gdt.rd_base = (int) &gdt[myid * NGDT]; 244 lgdt(&r_gdt); /* does magic intra-segment return */ 245 246 r_idt.rd_limit = sizeof(struct gate_descriptor) * NIDT - 1; 247 r_idt.rd_base = (int)idt; 248 lidt(&r_idt); 249 250 lldt(_default_ldt); 251 PCPU_SET(currentldt, _default_ldt); 252 253 PCPU_SET(trampstk, (uintptr_t)ap_tramp_stack_base + TRAMP_STACK_SZ - 254 VM86_STACK_SPACE); 255 256 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 257 gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; 258 common_tssp->tss_esp0 = PCPU_GET(trampstk); 259 common_tssp->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); 260 common_tssp->tss_ioopt = sizeof(struct i386tss) << 16; 261 PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd); 262 PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 263 ltr(gsel_tss); 264 265 PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd); 266 PCPU_SET(copyout_buf, ap_copyout_buf); 267 268 /* 269 * Set to a known state: 270 * Set by mpboot.s: CR0_PG, CR0_PE 271 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 272 */ 273 cr0 = rcr0(); 274 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); 275 load_cr0(cr0); 276 CHECK_WRITE(0x38, 5); 277 278 /* signal our startup to the BSP. */ 279 mp_naps++; 280 CHECK_WRITE(0x39, 6); 281 282 /* Spin until the BSP releases the AP's. */ 283 while (atomic_load_acq_int(&aps_ready) == 0) 284 ia32_pause(); 285 286 /* BSP may have changed PTD while we were waiting */ 287 invltlb(); 288 289 #if defined(I586_CPU) && !defined(NO_F00F_HACK) 290 lidt(&r_idt); 291 #endif 292 293 init_secondary_tail(); 294 } 295 296 /* 297 * start each AP in our list 298 */ 299 #define TMPMAP_START 1 300 static int 301 start_all_aps(void) 302 { 303 u_char mpbiosreason; 304 u_int32_t mpbioswarmvec; 305 int apic_id, cpu; 306 307 mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 308 309 pmap_remap_lower(true); 310 311 /* install the AP 1st level boot code */ 312 install_ap_tramp(); 313 314 /* save the current value of the warm-start vector */ 315 mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); 316 outb(CMOS_REG, BIOS_RESET); 317 mpbiosreason = inb(CMOS_DATA); 318 319 /* take advantage of the P==V mapping for PTD[0] for AP boot */ 320 321 /* start each AP */ 322 for (cpu = 1; cpu < mp_ncpus; cpu++) { 323 apic_id = cpu_apic_ids[cpu]; 324 325 /* allocate and set up a boot stack data page */ 326 bootstacks[cpu] = (char *)kmem_malloc(kstack_pages * PAGE_SIZE, 327 M_WAITOK | M_ZERO); 328 dpcpu = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO); 329 /* setup a vector to our boot code */ 330 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; 331 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); 332 outb(CMOS_REG, BIOS_RESET); 333 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ 334 335 bootSTK = (char *)bootstacks[cpu] + kstack_pages * 336 PAGE_SIZE - 4; 337 bootAP = cpu; 338 339 ap_tramp_stack_base = pmap_trm_alloc(TRAMP_STACK_SZ, M_NOWAIT); 340 ap_copyout_buf = pmap_trm_alloc(TRAMP_COPYOUT_SZ, M_NOWAIT); 341 342 /* attempt to start the Application Processor */ 343 CHECK_INIT(99); /* setup checkpoints */ 344 if (!start_ap(apic_id)) { 345 printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); 346 CHECK_PRINT("trace"); /* show checkpoints */ 347 /* better panic as the AP may be running loose */ 348 printf("panic y/n? [y] "); 349 if (cngetc() != 'n') 350 panic("bye-bye"); 351 } 352 CHECK_PRINT("trace"); /* show checkpoints */ 353 354 CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ 355 } 356 357 pmap_remap_lower(false); 358 359 /* restore the warmstart vector */ 360 *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; 361 362 outb(CMOS_REG, BIOS_RESET); 363 outb(CMOS_DATA, mpbiosreason); 364 365 /* number of APs actually started */ 366 return mp_naps; 367 } 368 369 /* 370 * load the 1st level AP boot code into base memory. 371 */ 372 373 /* targets for relocation */ 374 extern void bigJump(void); 375 extern void bootCodeSeg(void); 376 extern void bootDataSeg(void); 377 extern void MPentry(void); 378 extern u_int MP_GDT; 379 extern u_int mp_gdtbase; 380 381 static void 382 install_ap_tramp(void) 383 { 384 int x; 385 int size = *(int *) ((u_long) & bootMP_size); 386 vm_offset_t va = boot_address; 387 u_char *src = (u_char *) ((u_long) bootMP); 388 u_char *dst = (u_char *) va; 389 u_int boot_base = (u_int) bootMP; 390 u_int8_t *dst8; 391 u_int16_t *dst16; 392 u_int32_t *dst32; 393 394 KASSERT (size <= PAGE_SIZE, 395 ("'size' do not fit into PAGE_SIZE, as expected.")); 396 pmap_kenter(va, boot_address); 397 pmap_invalidate_page (kernel_pmap, va); 398 for (x = 0; x < size; ++x) 399 *dst++ = *src++; 400 401 /* 402 * modify addresses in code we just moved to basemem. unfortunately we 403 * need fairly detailed info about mpboot.s for this to work. changes 404 * to mpboot.s might require changes here. 405 */ 406 407 /* boot code is located in KERNEL space */ 408 dst = (u_char *) va; 409 410 /* modify the lgdt arg */ 411 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); 412 *dst32 = boot_address + ((u_int) & MP_GDT - boot_base); 413 414 /* modify the ljmp target for MPentry() */ 415 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); 416 *dst32 = (u_int)MPentry; 417 418 /* modify the target for boot code segment */ 419 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); 420 dst8 = (u_int8_t *) (dst16 + 1); 421 *dst16 = (u_int) boot_address & 0xffff; 422 *dst8 = ((u_int) boot_address >> 16) & 0xff; 423 424 /* modify the target for boot data segment */ 425 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); 426 dst8 = (u_int8_t *) (dst16 + 1); 427 *dst16 = (u_int) boot_address & 0xffff; 428 *dst8 = ((u_int) boot_address >> 16) & 0xff; 429 } 430 431 /* 432 * This function starts the AP (application processor) identified 433 * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 434 * to accomplish this. This is necessary because of the nuances 435 * of the different hardware we might encounter. It isn't pretty, 436 * but it seems to work. 437 */ 438 static int 439 start_ap(int apic_id) 440 { 441 int vector, ms; 442 int cpus; 443 444 /* calculate the vector */ 445 vector = (boot_address >> 12) & 0xff; 446 447 /* used as a watchpoint to signal AP startup */ 448 cpus = mp_naps; 449 450 ipi_startup(apic_id, vector); 451 452 /* Wait up to 5 seconds for it to start. */ 453 for (ms = 0; ms < 5000; ms++) { 454 if (mp_naps > cpus) 455 return 1; /* return SUCCESS */ 456 DELAY(1000); 457 } 458 return 0; /* return FAILURE */ 459 } 460