1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 1996, by Steve Passe 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. The name of the developer may NOT be used to endorse or promote products 13 * derived from this software without specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_apic.h" 32 #include "opt_cpu.h" 33 #include "opt_kstack_pages.h" 34 #include "opt_pmap.h" 35 #include "opt_sched.h" 36 #include "opt_smp.h" 37 38 #if !defined(lint) 39 #if !defined(SMP) 40 #error How did you get here? 41 #endif 42 43 #ifndef DEV_APIC 44 #error The apic device is required for SMP, add "device apic" to your config file. 45 #endif 46 #endif /* not lint */ 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/bus.h> 51 #include <sys/cons.h> /* cngetc() */ 52 #include <sys/cpuset.h> 53 #ifdef GPROF 54 #include <sys/gmon.h> 55 #endif 56 #include <sys/kernel.h> 57 #include <sys/ktr.h> 58 #include <sys/lock.h> 59 #include <sys/malloc.h> 60 #include <sys/memrange.h> 61 #include <sys/mutex.h> 62 #include <sys/pcpu.h> 63 #include <sys/proc.h> 64 #include <sys/sched.h> 65 #include <sys/smp.h> 66 #include <sys/sysctl.h> 67 68 #include <vm/vm.h> 69 #include <vm/vm_param.h> 70 #include <vm/pmap.h> 71 #include <vm/vm_kern.h> 72 #include <vm/vm_extern.h> 73 74 #include <x86/apicreg.h> 75 #include <machine/clock.h> 76 #include <machine/cpu.h> 77 #include <machine/cputypes.h> 78 #include <x86/mca.h> 79 #include <machine/md_var.h> 80 #include <machine/pcb.h> 81 #include <machine/psl.h> 82 #include <machine/smp.h> 83 #include <machine/specialreg.h> 84 #include <x86/ucode.h> 85 86 #define WARMBOOT_TARGET 0 87 #define WARMBOOT_OFF (PMAP_MAP_LOW + 0x0467) 88 #define WARMBOOT_SEG (PMAP_MAP_LOW + 0x0469) 89 90 #define CMOS_REG (0x70) 91 #define CMOS_DATA (0x71) 92 #define BIOS_RESET (0x0f) 93 #define BIOS_WARM (0x0a) 94 95 /* 96 * this code MUST be enabled here and in mpboot.s. 97 * it follows the very early stages of AP boot by placing values in CMOS ram. 98 * it NORMALLY will never be needed and thus the primitive method for enabling. 99 * 100 #define CHECK_POINTS 101 */ 102 103 #if defined(CHECK_POINTS) 104 #define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) 105 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) 106 107 #define CHECK_INIT(D); \ 108 CHECK_WRITE(0x34, (D)); \ 109 CHECK_WRITE(0x35, (D)); \ 110 CHECK_WRITE(0x36, (D)); \ 111 CHECK_WRITE(0x37, (D)); \ 112 CHECK_WRITE(0x38, (D)); \ 113 CHECK_WRITE(0x39, (D)); 114 115 #define CHECK_PRINT(S); \ 116 printf("%s: %d, %d, %d, %d, %d, %d\n", \ 117 (S), \ 118 CHECK_READ(0x34), \ 119 CHECK_READ(0x35), \ 120 CHECK_READ(0x36), \ 121 CHECK_READ(0x37), \ 122 CHECK_READ(0x38), \ 123 CHECK_READ(0x39)); 124 125 #else /* CHECK_POINTS */ 126 127 #define CHECK_INIT(D) 128 #define CHECK_PRINT(S) 129 #define CHECK_WRITE(A, D) 130 131 #endif /* CHECK_POINTS */ 132 133 extern struct pcpu __pcpu[]; 134 135 /* 136 * Local data and functions. 137 */ 138 139 static void install_ap_tramp(void); 140 static int start_all_aps(void); 141 static int start_ap(int apic_id); 142 143 static char *ap_copyout_buf; 144 static char *ap_tramp_stack_base; 145 /* 146 * Initialize the IPI handlers and start up the AP's. 147 */ 148 void 149 cpu_mp_start(void) 150 { 151 int i; 152 153 /* Initialize the logical ID to APIC ID table. */ 154 for (i = 0; i < MAXCPU; i++) { 155 cpu_apic_ids[i] = -1; 156 cpu_ipi_pending[i] = 0; 157 } 158 159 /* Install an inter-CPU IPI for TLB invalidation */ 160 setidt(IPI_INVLTLB, IDTVEC(invltlb), 161 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 162 setidt(IPI_INVLPG, IDTVEC(invlpg), 163 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 164 setidt(IPI_INVLRNG, IDTVEC(invlrng), 165 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 166 167 /* Install an inter-CPU IPI for cache invalidation. */ 168 setidt(IPI_INVLCACHE, IDTVEC(invlcache), 169 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 170 171 /* Install an inter-CPU IPI for all-CPU rendezvous */ 172 setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), 173 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 174 175 /* Install generic inter-CPU IPI handler */ 176 setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler), 177 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 178 179 /* Install an inter-CPU IPI for CPU stop/restart */ 180 setidt(IPI_STOP, IDTVEC(cpustop), 181 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 182 183 /* Install an inter-CPU IPI for CPU suspend/resume */ 184 setidt(IPI_SUSPEND, IDTVEC(cpususpend), 185 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 186 187 /* Set boot_cpu_id if needed. */ 188 if (boot_cpu_id == -1) { 189 boot_cpu_id = PCPU_GET(apic_id); 190 cpu_info[boot_cpu_id].cpu_bsp = 1; 191 } else 192 KASSERT(boot_cpu_id == PCPU_GET(apic_id), 193 ("BSP's APIC ID doesn't match boot_cpu_id")); 194 195 /* Probe logical/physical core configuration. */ 196 topo_probe(); 197 198 assign_cpu_ids(); 199 200 /* Start each Application Processor */ 201 start_all_aps(); 202 203 set_interrupt_apic_ids(); 204 } 205 206 /* 207 * AP CPU's call this to initialize themselves. 208 */ 209 void 210 init_secondary(void) 211 { 212 struct pcpu *pc; 213 struct i386tss *common_tssp; 214 struct region_descriptor r_gdt, r_idt; 215 int gsel_tss, myid, x; 216 u_int cr0; 217 218 /* bootAP is set in start_ap() to our ID. */ 219 myid = bootAP; 220 221 /* Update microcode before doing anything else. */ 222 ucode_load_ap(myid); 223 224 /* Get per-cpu data */ 225 pc = &__pcpu[myid]; 226 227 /* prime data page for it to use */ 228 pcpu_init(pc, myid, sizeof(struct pcpu)); 229 dpcpu_init(dpcpu, myid); 230 pc->pc_apic_id = cpu_apic_ids[myid]; 231 pc->pc_prvspace = pc; 232 pc->pc_curthread = 0; 233 pc->pc_common_tssp = common_tssp = &(__pcpu[0].pc_common_tssp)[myid]; 234 235 fix_cpuid(); 236 237 gdt_segs[GPRIV_SEL].ssd_base = (int)pc; 238 gdt_segs[GPROC0_SEL].ssd_base = (int)common_tssp; 239 gdt_segs[GLDT_SEL].ssd_base = (int)ldt; 240 241 for (x = 0; x < NGDT; x++) { 242 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); 243 } 244 245 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 246 r_gdt.rd_base = (int) &gdt[myid * NGDT]; 247 lgdt(&r_gdt); /* does magic intra-segment return */ 248 249 r_idt.rd_limit = sizeof(struct gate_descriptor) * NIDT - 1; 250 r_idt.rd_base = (int)idt; 251 lidt(&r_idt); 252 253 lldt(_default_ldt); 254 PCPU_SET(currentldt, _default_ldt); 255 256 PCPU_SET(trampstk, (uintptr_t)ap_tramp_stack_base + TRAMP_STACK_SZ - 257 VM86_STACK_SPACE); 258 259 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 260 gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; 261 common_tssp->tss_esp0 = PCPU_GET(trampstk); 262 common_tssp->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); 263 common_tssp->tss_ioopt = sizeof(struct i386tss) << 16; 264 PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd); 265 PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 266 ltr(gsel_tss); 267 268 PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd); 269 PCPU_SET(copyout_buf, ap_copyout_buf); 270 271 /* 272 * Set to a known state: 273 * Set by mpboot.s: CR0_PG, CR0_PE 274 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 275 */ 276 cr0 = rcr0(); 277 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); 278 load_cr0(cr0); 279 CHECK_WRITE(0x38, 5); 280 281 /* signal our startup to the BSP. */ 282 mp_naps++; 283 CHECK_WRITE(0x39, 6); 284 285 /* Spin until the BSP releases the AP's. */ 286 while (atomic_load_acq_int(&aps_ready) == 0) 287 ia32_pause(); 288 289 /* BSP may have changed PTD while we were waiting */ 290 invltlb(); 291 292 #if defined(I586_CPU) && !defined(NO_F00F_HACK) 293 lidt(&r_idt); 294 #endif 295 296 init_secondary_tail(); 297 } 298 299 /* 300 * start each AP in our list 301 */ 302 #define TMPMAP_START 1 303 static int 304 start_all_aps(void) 305 { 306 u_char mpbiosreason; 307 u_int32_t mpbioswarmvec; 308 int apic_id, cpu; 309 310 mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 311 312 pmap_remap_lower(true); 313 314 /* install the AP 1st level boot code */ 315 install_ap_tramp(); 316 317 /* save the current value of the warm-start vector */ 318 mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); 319 outb(CMOS_REG, BIOS_RESET); 320 mpbiosreason = inb(CMOS_DATA); 321 322 /* take advantage of the P==V mapping for PTD[0] for AP boot */ 323 324 /* start each AP */ 325 for (cpu = 1; cpu < mp_ncpus; cpu++) { 326 apic_id = cpu_apic_ids[cpu]; 327 328 /* allocate and set up a boot stack data page */ 329 bootstacks[cpu] = (char *)kmem_malloc(kstack_pages * PAGE_SIZE, 330 M_WAITOK | M_ZERO); 331 dpcpu = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO); 332 /* setup a vector to our boot code */ 333 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; 334 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); 335 outb(CMOS_REG, BIOS_RESET); 336 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ 337 338 bootSTK = (char *)bootstacks[cpu] + kstack_pages * 339 PAGE_SIZE - 4; 340 bootAP = cpu; 341 342 ap_tramp_stack_base = pmap_trm_alloc(TRAMP_STACK_SZ, M_NOWAIT); 343 ap_copyout_buf = pmap_trm_alloc(TRAMP_COPYOUT_SZ, M_NOWAIT); 344 345 /* attempt to start the Application Processor */ 346 CHECK_INIT(99); /* setup checkpoints */ 347 if (!start_ap(apic_id)) { 348 printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); 349 CHECK_PRINT("trace"); /* show checkpoints */ 350 /* better panic as the AP may be running loose */ 351 printf("panic y/n? [y] "); 352 if (cngetc() != 'n') 353 panic("bye-bye"); 354 } 355 CHECK_PRINT("trace"); /* show checkpoints */ 356 357 CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ 358 } 359 360 pmap_remap_lower(false); 361 362 /* restore the warmstart vector */ 363 *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; 364 365 outb(CMOS_REG, BIOS_RESET); 366 outb(CMOS_DATA, mpbiosreason); 367 368 /* number of APs actually started */ 369 return mp_naps; 370 } 371 372 /* 373 * load the 1st level AP boot code into base memory. 374 */ 375 376 /* targets for relocation */ 377 extern void bigJump(void); 378 extern void bootCodeSeg(void); 379 extern void bootDataSeg(void); 380 extern void MPentry(void); 381 extern u_int MP_GDT; 382 extern u_int mp_gdtbase; 383 384 static void 385 install_ap_tramp(void) 386 { 387 int x; 388 int size = *(int *) ((u_long) & bootMP_size); 389 vm_offset_t va = boot_address; 390 u_char *src = (u_char *) ((u_long) bootMP); 391 u_char *dst = (u_char *) va; 392 u_int boot_base = (u_int) bootMP; 393 u_int8_t *dst8; 394 u_int16_t *dst16; 395 u_int32_t *dst32; 396 397 KASSERT (size <= PAGE_SIZE, 398 ("'size' do not fit into PAGE_SIZE, as expected.")); 399 pmap_kenter(va, boot_address); 400 pmap_invalidate_page (kernel_pmap, va); 401 for (x = 0; x < size; ++x) 402 *dst++ = *src++; 403 404 /* 405 * modify addresses in code we just moved to basemem. unfortunately we 406 * need fairly detailed info about mpboot.s for this to work. changes 407 * to mpboot.s might require changes here. 408 */ 409 410 /* boot code is located in KERNEL space */ 411 dst = (u_char *) va; 412 413 /* modify the lgdt arg */ 414 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); 415 *dst32 = boot_address + ((u_int) & MP_GDT - boot_base); 416 417 /* modify the ljmp target for MPentry() */ 418 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); 419 *dst32 = (u_int)MPentry; 420 421 /* modify the target for boot code segment */ 422 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); 423 dst8 = (u_int8_t *) (dst16 + 1); 424 *dst16 = (u_int) boot_address & 0xffff; 425 *dst8 = ((u_int) boot_address >> 16) & 0xff; 426 427 /* modify the target for boot data segment */ 428 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); 429 dst8 = (u_int8_t *) (dst16 + 1); 430 *dst16 = (u_int) boot_address & 0xffff; 431 *dst8 = ((u_int) boot_address >> 16) & 0xff; 432 } 433 434 /* 435 * This function starts the AP (application processor) identified 436 * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 437 * to accomplish this. This is necessary because of the nuances 438 * of the different hardware we might encounter. It isn't pretty, 439 * but it seems to work. 440 */ 441 static int 442 start_ap(int apic_id) 443 { 444 int vector, ms; 445 int cpus; 446 447 /* calculate the vector */ 448 vector = (boot_address >> 12) & 0xff; 449 450 /* used as a watchpoint to signal AP startup */ 451 cpus = mp_naps; 452 453 ipi_startup(apic_id, vector); 454 455 /* Wait up to 5 seconds for it to start. */ 456 for (ms = 0; ms < 5000; ms++) { 457 if (mp_naps > cpus) 458 return 1; /* return SUCCESS */ 459 DELAY(1000); 460 } 461 return 0; /* return FAILURE */ 462 } 463