1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 1996, by Steve Passe 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. The name of the developer may NOT be used to endorse or promote products 13 * derived from this software without specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_apic.h" 32 #include "opt_cpu.h" 33 #include "opt_kstack_pages.h" 34 #include "opt_pmap.h" 35 #include "opt_sched.h" 36 #include "opt_smp.h" 37 38 #if !defined(lint) 39 #if !defined(SMP) 40 #error How did you get here? 41 #endif 42 43 #ifndef DEV_APIC 44 #error The apic device is required for SMP, add "device apic" to your config file. 45 #endif 46 #endif /* not lint */ 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/bus.h> 51 #include <sys/cons.h> /* cngetc() */ 52 #include <sys/cpuset.h> 53 #ifdef GPROF 54 #include <sys/gmon.h> 55 #endif 56 #include <sys/kernel.h> 57 #include <sys/ktr.h> 58 #include <sys/lock.h> 59 #include <sys/malloc.h> 60 #include <sys/memrange.h> 61 #include <sys/mutex.h> 62 #include <sys/pcpu.h> 63 #include <sys/proc.h> 64 #include <sys/sched.h> 65 #include <sys/smp.h> 66 #include <sys/sysctl.h> 67 68 #include <vm/vm.h> 69 #include <vm/vm_param.h> 70 #include <vm/pmap.h> 71 #include <vm/vm_kern.h> 72 #include <vm/vm_extern.h> 73 74 #include <x86/apicreg.h> 75 #include <machine/clock.h> 76 #include <machine/cputypes.h> 77 #include <x86/mca.h> 78 #include <machine/md_var.h> 79 #include <machine/pcb.h> 80 #include <machine/psl.h> 81 #include <machine/smp.h> 82 #include <machine/specialreg.h> 83 #include <machine/cpu.h> 84 85 #define WARMBOOT_TARGET 0 86 #define WARMBOOT_OFF (PMAP_MAP_LOW + 0x0467) 87 #define WARMBOOT_SEG (PMAP_MAP_LOW + 0x0469) 88 89 #define CMOS_REG (0x70) 90 #define CMOS_DATA (0x71) 91 #define BIOS_RESET (0x0f) 92 #define BIOS_WARM (0x0a) 93 94 /* 95 * this code MUST be enabled here and in mpboot.s. 96 * it follows the very early stages of AP boot by placing values in CMOS ram. 97 * it NORMALLY will never be needed and thus the primitive method for enabling. 98 * 99 #define CHECK_POINTS 100 */ 101 102 #if defined(CHECK_POINTS) 103 #define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) 104 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) 105 106 #define CHECK_INIT(D); \ 107 CHECK_WRITE(0x34, (D)); \ 108 CHECK_WRITE(0x35, (D)); \ 109 CHECK_WRITE(0x36, (D)); \ 110 CHECK_WRITE(0x37, (D)); \ 111 CHECK_WRITE(0x38, (D)); \ 112 CHECK_WRITE(0x39, (D)); 113 114 #define CHECK_PRINT(S); \ 115 printf("%s: %d, %d, %d, %d, %d, %d\n", \ 116 (S), \ 117 CHECK_READ(0x34), \ 118 CHECK_READ(0x35), \ 119 CHECK_READ(0x36), \ 120 CHECK_READ(0x37), \ 121 CHECK_READ(0x38), \ 122 CHECK_READ(0x39)); 123 124 #else /* CHECK_POINTS */ 125 126 #define CHECK_INIT(D) 127 #define CHECK_PRINT(S) 128 #define CHECK_WRITE(A, D) 129 130 #endif /* CHECK_POINTS */ 131 132 extern struct pcpu __pcpu[]; 133 134 /* 135 * Local data and functions. 136 */ 137 138 static void install_ap_tramp(void); 139 static int start_all_aps(void); 140 static int start_ap(int apic_id); 141 142 static char *ap_copyout_buf; 143 static char *ap_tramp_stack_base; 144 /* 145 * Initialize the IPI handlers and start up the AP's. 146 */ 147 void 148 cpu_mp_start(void) 149 { 150 int i; 151 152 /* Initialize the logical ID to APIC ID table. */ 153 for (i = 0; i < MAXCPU; i++) { 154 cpu_apic_ids[i] = -1; 155 cpu_ipi_pending[i] = 0; 156 } 157 158 /* Install an inter-CPU IPI for TLB invalidation */ 159 setidt(IPI_INVLTLB, IDTVEC(invltlb), 160 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 161 setidt(IPI_INVLPG, IDTVEC(invlpg), 162 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 163 setidt(IPI_INVLRNG, IDTVEC(invlrng), 164 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 165 166 /* Install an inter-CPU IPI for cache invalidation. */ 167 setidt(IPI_INVLCACHE, IDTVEC(invlcache), 168 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 169 170 /* Install an inter-CPU IPI for all-CPU rendezvous */ 171 setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), 172 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 173 174 /* Install generic inter-CPU IPI handler */ 175 setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler), 176 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 177 178 /* Install an inter-CPU IPI for CPU stop/restart */ 179 setidt(IPI_STOP, IDTVEC(cpustop), 180 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 181 182 /* Install an inter-CPU IPI for CPU suspend/resume */ 183 setidt(IPI_SUSPEND, IDTVEC(cpususpend), 184 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 185 186 /* Set boot_cpu_id if needed. */ 187 if (boot_cpu_id == -1) { 188 boot_cpu_id = PCPU_GET(apic_id); 189 cpu_info[boot_cpu_id].cpu_bsp = 1; 190 } else 191 KASSERT(boot_cpu_id == PCPU_GET(apic_id), 192 ("BSP's APIC ID doesn't match boot_cpu_id")); 193 194 /* Probe logical/physical core configuration. */ 195 topo_probe(); 196 197 assign_cpu_ids(); 198 199 /* Start each Application Processor */ 200 start_all_aps(); 201 202 set_interrupt_apic_ids(); 203 } 204 205 /* 206 * AP CPU's call this to initialize themselves. 207 */ 208 void 209 init_secondary(void) 210 { 211 struct pcpu *pc; 212 struct i386tss *common_tssp; 213 struct region_descriptor r_gdt, r_idt; 214 int gsel_tss, myid, x; 215 u_int cr0; 216 217 /* bootAP is set in start_ap() to our ID. */ 218 myid = bootAP; 219 220 /* Get per-cpu data */ 221 pc = &__pcpu[myid]; 222 223 /* prime data page for it to use */ 224 pcpu_init(pc, myid, sizeof(struct pcpu)); 225 dpcpu_init(dpcpu, myid); 226 pc->pc_apic_id = cpu_apic_ids[myid]; 227 pc->pc_prvspace = pc; 228 pc->pc_curthread = 0; 229 pc->pc_common_tssp = common_tssp = &(__pcpu[0].pc_common_tssp)[myid]; 230 231 fix_cpuid(); 232 233 gdt_segs[GPRIV_SEL].ssd_base = (int)pc; 234 gdt_segs[GPROC0_SEL].ssd_base = (int)common_tssp; 235 gdt_segs[GLDT_SEL].ssd_base = (int)ldt; 236 237 for (x = 0; x < NGDT; x++) { 238 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); 239 } 240 241 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 242 r_gdt.rd_base = (int) &gdt[myid * NGDT]; 243 lgdt(&r_gdt); /* does magic intra-segment return */ 244 245 r_idt.rd_limit = sizeof(struct gate_descriptor) * NIDT - 1; 246 r_idt.rd_base = (int)idt; 247 lidt(&r_idt); 248 249 lldt(_default_ldt); 250 PCPU_SET(currentldt, _default_ldt); 251 252 PCPU_SET(trampstk, (uintptr_t)ap_tramp_stack_base + TRAMP_STACK_SZ - 253 VM86_STACK_SPACE); 254 255 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 256 gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; 257 common_tssp->tss_esp0 = PCPU_GET(trampstk); 258 common_tssp->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); 259 common_tssp->tss_ioopt = sizeof(struct i386tss) << 16; 260 PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd); 261 PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 262 ltr(gsel_tss); 263 264 PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd); 265 PCPU_SET(copyout_buf, ap_copyout_buf); 266 267 /* 268 * Set to a known state: 269 * Set by mpboot.s: CR0_PG, CR0_PE 270 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 271 */ 272 cr0 = rcr0(); 273 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); 274 load_cr0(cr0); 275 CHECK_WRITE(0x38, 5); 276 277 /* signal our startup to the BSP. */ 278 mp_naps++; 279 CHECK_WRITE(0x39, 6); 280 281 /* Spin until the BSP releases the AP's. */ 282 while (atomic_load_acq_int(&aps_ready) == 0) 283 ia32_pause(); 284 285 /* BSP may have changed PTD while we were waiting */ 286 invltlb(); 287 288 #if defined(I586_CPU) && !defined(NO_F00F_HACK) 289 lidt(&r_idt); 290 #endif 291 292 init_secondary_tail(); 293 } 294 295 /* 296 * start each AP in our list 297 */ 298 #define TMPMAP_START 1 299 static int 300 start_all_aps(void) 301 { 302 u_char mpbiosreason; 303 u_int32_t mpbioswarmvec; 304 int apic_id, cpu; 305 306 mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 307 308 /* Remap lowest 1MB */ 309 IdlePTD[0] = IdlePTD[1]; 310 load_cr3(rcr3()); /* invalidate TLB */ 311 312 /* install the AP 1st level boot code */ 313 install_ap_tramp(); 314 315 /* save the current value of the warm-start vector */ 316 mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); 317 outb(CMOS_REG, BIOS_RESET); 318 mpbiosreason = inb(CMOS_DATA); 319 320 /* take advantage of the P==V mapping for PTD[0] for AP boot */ 321 322 /* start each AP */ 323 for (cpu = 1; cpu < mp_ncpus; cpu++) { 324 apic_id = cpu_apic_ids[cpu]; 325 326 /* allocate and set up a boot stack data page */ 327 bootstacks[cpu] = 328 (char *)kmem_malloc(kernel_arena, kstack_pages * PAGE_SIZE, 329 M_WAITOK | M_ZERO); 330 dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, 331 M_WAITOK | M_ZERO); 332 /* setup a vector to our boot code */ 333 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; 334 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); 335 outb(CMOS_REG, BIOS_RESET); 336 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ 337 338 bootSTK = (char *)bootstacks[cpu] + kstack_pages * 339 PAGE_SIZE - 4; 340 bootAP = cpu; 341 342 ap_tramp_stack_base = pmap_trm_alloc(TRAMP_STACK_SZ, M_NOWAIT); 343 ap_copyout_buf = pmap_trm_alloc(TRAMP_COPYOUT_SZ, M_NOWAIT); 344 345 /* attempt to start the Application Processor */ 346 CHECK_INIT(99); /* setup checkpoints */ 347 if (!start_ap(apic_id)) { 348 printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); 349 CHECK_PRINT("trace"); /* show checkpoints */ 350 /* better panic as the AP may be running loose */ 351 printf("panic y/n? [y] "); 352 if (cngetc() != 'n') 353 panic("bye-bye"); 354 } 355 CHECK_PRINT("trace"); /* show checkpoints */ 356 357 CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ 358 } 359 360 /* Unmap lowest 1MB again */ 361 IdlePTD[0] = 0; 362 load_cr3(rcr3()); 363 364 /* restore the warmstart vector */ 365 *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; 366 367 outb(CMOS_REG, BIOS_RESET); 368 outb(CMOS_DATA, mpbiosreason); 369 370 /* number of APs actually started */ 371 return mp_naps; 372 } 373 374 /* 375 * load the 1st level AP boot code into base memory. 376 */ 377 378 /* targets for relocation */ 379 extern void bigJump(void); 380 extern void bootCodeSeg(void); 381 extern void bootDataSeg(void); 382 extern void MPentry(void); 383 extern u_int MP_GDT; 384 extern u_int mp_gdtbase; 385 386 static void 387 install_ap_tramp(void) 388 { 389 int x; 390 int size = *(int *) ((u_long) & bootMP_size); 391 vm_offset_t va = boot_address; 392 u_char *src = (u_char *) ((u_long) bootMP); 393 u_char *dst = (u_char *) va; 394 u_int boot_base = (u_int) bootMP; 395 u_int8_t *dst8; 396 u_int16_t *dst16; 397 u_int32_t *dst32; 398 399 KASSERT (size <= PAGE_SIZE, 400 ("'size' do not fit into PAGE_SIZE, as expected.")); 401 pmap_kenter(va, boot_address); 402 pmap_invalidate_page (kernel_pmap, va); 403 for (x = 0; x < size; ++x) 404 *dst++ = *src++; 405 406 /* 407 * modify addresses in code we just moved to basemem. unfortunately we 408 * need fairly detailed info about mpboot.s for this to work. changes 409 * to mpboot.s might require changes here. 410 */ 411 412 /* boot code is located in KERNEL space */ 413 dst = (u_char *) va; 414 415 /* modify the lgdt arg */ 416 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); 417 *dst32 = boot_address + ((u_int) & MP_GDT - boot_base); 418 419 /* modify the ljmp target for MPentry() */ 420 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); 421 *dst32 = (u_int)MPentry; 422 423 /* modify the target for boot code segment */ 424 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); 425 dst8 = (u_int8_t *) (dst16 + 1); 426 *dst16 = (u_int) boot_address & 0xffff; 427 *dst8 = ((u_int) boot_address >> 16) & 0xff; 428 429 /* modify the target for boot data segment */ 430 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); 431 dst8 = (u_int8_t *) (dst16 + 1); 432 *dst16 = (u_int) boot_address & 0xffff; 433 *dst8 = ((u_int) boot_address >> 16) & 0xff; 434 } 435 436 /* 437 * This function starts the AP (application processor) identified 438 * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 439 * to accomplish this. This is necessary because of the nuances 440 * of the different hardware we might encounter. It isn't pretty, 441 * but it seems to work. 442 */ 443 static int 444 start_ap(int apic_id) 445 { 446 int vector, ms; 447 int cpus; 448 449 /* calculate the vector */ 450 vector = (boot_address >> 12) & 0xff; 451 452 /* used as a watchpoint to signal AP startup */ 453 cpus = mp_naps; 454 455 ipi_startup(apic_id, vector); 456 457 /* Wait up to 5 seconds for it to start. */ 458 for (ms = 0; ms < 5000; ms++) { 459 if (mp_naps > cpus) 460 return 1; /* return SUCCESS */ 461 DELAY(1000); 462 } 463 return 0; /* return FAILURE */ 464 } 465