1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cpumask.h> 36 #include <sys/interrupt.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/memrange.h> 40 #include <sys/tls.h> 41 #include <sys/types.h> 42 #include <sys/vmm.h> 43 44 #include <vm/vm_extern.h> 45 #include <vm/vm_kern.h> 46 #include <vm/vm_object.h> 47 #include <vm/vm_page.h> 48 49 #include <sys/mplock2.h> 50 #include <sys/thread2.h> 51 52 #include <machine/cpu.h> 53 #include <machine/cpufunc.h> 54 #include <machine/globaldata.h> 55 #include <machine/md_var.h> 56 #include <machine/pmap.h> 57 #include <machine/smp.h> 58 #include <machine/tls.h> 59 #include <machine/param.h> 60 61 #include <unistd.h> 62 #include <pthread.h> 63 #include <signal.h> 64 #include <stdio.h> 65 66 extern pt_entry_t *KPTphys; 67 68 extern int vmm_enabled; 69 70 volatile cpumask_t stopped_cpus; 71 /* which cpus are ready for IPIs etc? */ 72 cpumask_t smp_active_mask = CPUMASK_INITIALIZER_ONLYONE; 73 static int boot_address; 74 /* which cpus have been started */ 75 static cpumask_t smp_startup_mask = CPUMASK_INITIALIZER_ONLYONE; 76 static int mp_finish; 77 78 /* Local data for detecting CPU TOPOLOGY */ 79 static int core_bits = 0; 80 static int logical_CPU_bits = 0; 81 82 /* function prototypes XXX these should go elsewhere */ 83 void bootstrap_idle(void); 84 void single_cpu_ipi(int, int, int); 85 void selected_cpu_ipi(cpumask_t, int, int); 86 #if 0 87 void ipi_handler(int); 88 #endif 89 90 pt_entry_t *SMPpt; 91 92 /* AP uses this during bootstrap. Do not staticize. */ 93 char *bootSTK; 94 static int bootAP; 95 96 97 /* XXX these need to go into the appropriate header file */ 98 static int start_all_aps(u_int); 99 void init_secondary(void); 100 void *start_ap(void *); 101 102 /* 103 * Get SMP fully working before we start initializing devices. 104 */ 105 static 106 void 107 ap_finish(void) 108 { 109 mp_finish = 1; 110 if (bootverbose) 111 kprintf("Finish MP startup\n"); 112 113 /* build our map of 'other' CPUs */ 114 mycpu->gd_other_cpus = smp_startup_mask; 115 CPUMASK_NANDBIT(mycpu->gd_other_cpus, mycpu->gd_cpuid); 116 117 /* 118 * Let the other cpu's finish initializing and build their map 119 * of 'other' CPUs. 120 */ 121 rel_mplock(); 122 while (CPUMASK_CMPMASKNEQ(smp_active_mask,smp_startup_mask)) { 123 DELAY(100000); 124 cpu_lfence(); 125 } 126 127 while (try_mplock() == 0) 128 DELAY(100000); 129 if (bootverbose) 130 kprintf("Active CPU Mask: %08lx\n", 131 (long)CPUMASK_LOWMASK(smp_active_mask)); 132 } 133 134 SYSINIT(finishsmp, SI_BOOT2_FINISH_SMP, SI_ORDER_FIRST, ap_finish, NULL); 135 136 void * 137 start_ap(void *arg __unused) 138 { 139 init_secondary(); 140 setrealcpu(); 141 bootstrap_idle(); 142 143 return(NULL); /* NOTREACHED */ 144 } 145 146 /* storage for AP thread IDs */ 147 pthread_t ap_tids[MAXCPU]; 148 149 int naps; 150 151 void 152 mp_start(void) 153 { 154 size_t ipiq_size; 155 int shift; 156 157 ncpus = optcpus; 158 naps = ncpus - 1; 159 160 for (shift = 0; (1 << shift) <= ncpus; ++shift) 161 ; 162 --shift; 163 164 /* ncpus_fit -- ncpus rounded up to the nearest power of 2 */ 165 if ((1 << shift) < ncpus) 166 ++shift; 167 ncpus_fit = 1 << shift; 168 ncpus_fit_mask = ncpus_fit - 1; 169 170 malloc_reinit_ncpus(); 171 172 /* 173 * cpu0 initialization 174 */ 175 ipiq_size = sizeof(struct lwkt_ipiq) * ncpus; 176 mycpu->gd_ipiq = (void *)kmem_alloc(kernel_map, ipiq_size, 177 VM_SUBSYS_IPIQ); 178 bzero(mycpu->gd_ipiq, ipiq_size); 179 180 /* initialize arc4random. */ 181 arc4_init_pcpu(0); 182 183 /* 184 * cpu 1-(n-1) 185 */ 186 start_all_aps(boot_address); 187 188 } 189 190 void 191 mp_announce(void) 192 { 193 int x; 194 195 kprintf("DragonFly/MP: Multiprocessor\n"); 196 kprintf(" cpu0 (BSP)\n"); 197 198 for (x = 1; x <= naps; ++x) 199 kprintf(" cpu%d (AP)\n", x); 200 } 201 202 void 203 cpu_send_ipiq(int dcpu) 204 { 205 if (CPUMASK_TESTBIT(smp_active_mask, dcpu)) { 206 if (pthread_kill(ap_tids[dcpu], SIGUSR1) != 0) 207 panic("pthread_kill failed in cpu_send_ipiq"); 208 } 209 #if 0 210 panic("XXX cpu_send_ipiq()"); 211 #endif 212 } 213 214 void 215 single_cpu_ipi(int cpu, int vector, int delivery_mode) 216 { 217 kprintf("XXX single_cpu_ipi\n"); 218 } 219 220 void 221 selected_cpu_ipi(cpumask_t target, int vector, int delivery_mode) 222 { 223 crit_enter(); 224 while (CPUMASK_TESTNZERO(target)) { 225 int n = BSFCPUMASK(target); 226 CPUMASK_NANDBIT(target, n); 227 single_cpu_ipi(n, vector, delivery_mode); 228 } 229 crit_exit(); 230 } 231 232 int 233 stop_cpus(cpumask_t map) 234 { 235 CPUMASK_ANDMASK(map, smp_active_mask); 236 237 crit_enter(); 238 while (CPUMASK_TESTNZERO(map)) { 239 int n = BSFCPUMASK(map); 240 CPUMASK_NANDBIT(map, n); 241 ATOMIC_CPUMASK_ORBIT(stopped_cpus, n); 242 if (pthread_kill(ap_tids[n], SIGXCPU) != 0) 243 panic("stop_cpus: pthread_kill failed"); 244 } 245 crit_exit(); 246 #if 0 247 panic("XXX stop_cpus()"); 248 #endif 249 250 return(1); 251 } 252 253 int 254 restart_cpus(cpumask_t map) 255 { 256 CPUMASK_ANDMASK(map, smp_active_mask); 257 258 crit_enter(); 259 while (CPUMASK_TESTNZERO(map)) { 260 int n = BSFCPUMASK(map); 261 CPUMASK_NANDBIT(map, n); 262 ATOMIC_CPUMASK_NANDBIT(stopped_cpus, n); 263 if (pthread_kill(ap_tids[n], SIGXCPU) != 0) 264 panic("restart_cpus: pthread_kill failed"); 265 } 266 crit_exit(); 267 #if 0 268 panic("XXX restart_cpus()"); 269 #endif 270 271 return(1); 272 } 273 void 274 ap_init(void) 275 { 276 /* 277 * Adjust smp_startup_mask to signal the BSP that we have started 278 * up successfully. Note that we do not yet hold the BGL. The BSP 279 * is waiting for our signal. 280 * 281 * We can't set our bit in smp_active_mask yet because we are holding 282 * interrupts physically disabled and remote cpus could deadlock 283 * trying to send us an IPI. 284 */ 285 ATOMIC_CPUMASK_ORBIT(smp_startup_mask, mycpu->gd_cpuid); 286 cpu_mfence(); 287 288 /* 289 * Interlock for finalization. Wait until mp_finish is non-zero, 290 * then get the MP lock. 291 * 292 * Note: We are in a critical section. 293 * 294 * Note: we are the idle thread, we can only spin. 295 * 296 * Note: The load fence is memory volatile and prevents the compiler 297 * from improperly caching mp_finish, and the cpu from improperly 298 * caching it. 299 */ 300 301 while (mp_finish == 0) { 302 cpu_lfence(); 303 DELAY(500000); 304 } 305 while (try_mplock() == 0) 306 DELAY(100000); 307 308 /* BSP may have changed PTD while we're waiting for the lock */ 309 cpu_invltlb(); 310 311 /* Build our map of 'other' CPUs. */ 312 mycpu->gd_other_cpus = smp_startup_mask; 313 CPUMASK_NANDBIT(mycpu->gd_other_cpus, mycpu->gd_cpuid); 314 315 kprintf("SMP: AP CPU #%d Launched!\n", mycpu->gd_cpuid); 316 317 318 /* Set memory range attributes for this CPU to match the BSP */ 319 mem_range_AP_init(); 320 /* 321 * Once we go active we must process any IPIQ messages that may 322 * have been queued, because no actual IPI will occur until we 323 * set our bit in the smp_active_mask. If we don't the IPI 324 * message interlock could be left set which would also prevent 325 * further IPIs. 326 * 327 * The idle loop doesn't expect the BGL to be held and while 328 * lwkt_switch() normally cleans things up this is a special case 329 * because we returning almost directly into the idle loop. 330 * 331 * The idle thread is never placed on the runq, make sure 332 * nothing we've done put it there. 333 */ 334 KKASSERT(get_mplock_count(curthread) == 1); 335 ATOMIC_CPUMASK_ORBIT(smp_active_mask, mycpu->gd_cpuid); 336 337 mdcpu->gd_fpending = 0; 338 mdcpu->gd_ipending = 0; 339 initclocks_pcpu(); /* clock interrupts (via IPIs) */ 340 341 /* 342 * Since we may have cleaned up the interrupt triggers, manually 343 * process any pending IPIs before exiting our critical section. 344 * Once the critical section has exited, normal interrupt processing 345 * may occur. 346 */ 347 atomic_swap_int(&mycpu->gd_npoll, 0); 348 lwkt_process_ipiq(); 349 350 /* 351 * Releasing the mp lock lets the BSP finish up the SMP init 352 */ 353 rel_mplock(); 354 KKASSERT((curthread->td_flags & TDF_RUNQ) == 0); 355 } 356 357 void 358 init_secondary(void) 359 { 360 int myid = bootAP; 361 struct mdglobaldata *md; 362 struct privatespace *ps; 363 364 ps = &CPU_prvspace[myid]; 365 366 KKASSERT(ps->mdglobaldata.mi.gd_prvspace == ps); 367 368 /* 369 * Setup the %gs for cpu #n. The mycpu macro works after this 370 * point. Note that %fs is used by pthreads. 371 */ 372 tls_set_gs(&CPU_prvspace[myid], sizeof(struct privatespace)); 373 374 md = mdcpu; /* loaded through %gs:0 (mdglobaldata.mi.gd_prvspace)*/ 375 376 /* JG */ 377 md->gd_common_tss.tss_rsp0 = 0; /* not used until after switch */ 378 //md->gd_common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); 379 //md->gd_common_tss.tss_ioopt = (sizeof md->gd_common_tss) << 16; 380 381 /* 382 * Set to a known state: 383 * Set by mpboot.s: CR0_PG, CR0_PE 384 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 385 */ 386 } 387 388 static int 389 start_all_aps(u_int boot_addr) 390 { 391 int x, i; 392 struct mdglobaldata *gd; 393 struct privatespace *ps; 394 vm_page_t m; 395 vm_offset_t va; 396 void *stack; 397 pthread_attr_t attr; 398 size_t ipiq_size; 399 #if 0 400 struct lwp_params params; 401 #endif 402 403 /* 404 * needed for ipis to initial thread 405 * FIXME: rename ap_tids? 406 */ 407 ap_tids[0] = pthread_self(); 408 pthread_attr_init(&attr); 409 410 vm_object_hold(kernel_object); 411 for (x = 1; x <= naps; ++x) { 412 /* Allocate space for the CPU's private space. */ 413 for (i = 0; i < sizeof(struct mdglobaldata); i += PAGE_SIZE) { 414 va =(vm_offset_t)&CPU_prvspace[x].mdglobaldata + i; 415 m = vm_page_alloc(kernel_object, va, VM_ALLOC_SYSTEM); 416 pmap_kenter_quick(va, m->phys_addr); 417 } 418 419 for (i = 0; i < sizeof(CPU_prvspace[x].idlestack); i += PAGE_SIZE) { 420 va =(vm_offset_t)&CPU_prvspace[x].idlestack + i; 421 m = vm_page_alloc(kernel_object, va, VM_ALLOC_SYSTEM); 422 pmap_kenter_quick(va, m->phys_addr); 423 } 424 425 gd = &CPU_prvspace[x].mdglobaldata; /* official location */ 426 bzero(gd, sizeof(*gd)); 427 gd->mi.gd_prvspace = ps = &CPU_prvspace[x]; 428 429 /* prime data page for it to use */ 430 mi_gdinit(&gd->mi, x); 431 cpu_gdinit(gd, x); 432 433 #if 0 434 gd->gd_CMAP1 = pmap_kpte((vm_offset_t)CPU_prvspace[x].CPAGE1); 435 gd->gd_CMAP2 = pmap_kpte((vm_offset_t)CPU_prvspace[x].CPAGE2); 436 gd->gd_CMAP3 = pmap_kpte((vm_offset_t)CPU_prvspace[x].CPAGE3); 437 gd->gd_PMAP1 = pmap_kpte((vm_offset_t)CPU_prvspace[x].PPAGE1); 438 gd->gd_CADDR1 = ps->CPAGE1; 439 gd->gd_CADDR2 = ps->CPAGE2; 440 gd->gd_CADDR3 = ps->CPAGE3; 441 gd->gd_PADDR1 = (vpte_t *)ps->PPAGE1; 442 #endif 443 444 ipiq_size = sizeof(struct lwkt_ipiq) * (naps + 1); 445 gd->mi.gd_ipiq = (void *)kmem_alloc(kernel_map, ipiq_size, 446 VM_SUBSYS_IPIQ); 447 bzero(gd->mi.gd_ipiq, ipiq_size); 448 449 /* initialize arc4random. */ 450 arc4_init_pcpu(x); 451 452 /* 453 * Setup the AP boot stack 454 */ 455 bootSTK = &ps->idlestack[UPAGES*PAGE_SIZE/2]; 456 bootAP = x; 457 458 /* 459 * Setup the AP's lwp, this is the 'cpu' 460 * 461 * We have to make sure our signals are masked or the new LWP 462 * may pick up a signal that it isn't ready for yet. SMP 463 * startup occurs after SI_BOOT2_LEAVE_CRIT so interrupts 464 * have already been enabled. 465 */ 466 cpu_disable_intr(); 467 468 if (vmm_enabled) { 469 stack = mmap(NULL, KERNEL_STACK_SIZE, 470 PROT_READ|PROT_WRITE|PROT_EXEC, 471 MAP_ANON, -1, 0); 472 if (stack == MAP_FAILED) { 473 panic("Unable to allocate stack for thread %d\n", x); 474 } 475 pthread_attr_setstack(&attr, stack, KERNEL_STACK_SIZE); 476 } 477 478 pthread_create(&ap_tids[x], &attr, start_ap, NULL); 479 cpu_enable_intr(); 480 481 while (CPUMASK_TESTBIT(smp_startup_mask, x) == 0) { 482 cpu_lfence(); /* XXX spin until the AP has started */ 483 DELAY(1000); 484 } 485 } 486 vm_object_drop(kernel_object); 487 pthread_attr_destroy(&attr); 488 489 return(ncpus - 1); 490 } 491 492 /* 493 * CPU TOPOLOGY DETECTION FUNCTIONS. 494 */ 495 void 496 detect_cpu_topology(void) 497 { 498 logical_CPU_bits = vkernel_b_arg; 499 core_bits = vkernel_B_arg; 500 } 501 502 int 503 get_chip_ID(int cpuid) 504 { 505 return get_apicid_from_cpuid(cpuid) >> 506 (logical_CPU_bits + core_bits); 507 } 508 509 int 510 get_chip_ID_from_APICID(int apicid) 511 { 512 return apicid >> (logical_CPU_bits + core_bits); 513 } 514 515 int 516 get_core_number_within_chip(int cpuid) 517 { 518 return ((get_apicid_from_cpuid(cpuid) >> logical_CPU_bits) & 519 ((1 << core_bits) - 1)); 520 } 521 522 int 523 get_logical_CPU_number_within_core(int cpuid) 524 { 525 return (get_apicid_from_cpuid(cpuid) & 526 ((1 << logical_CPU_bits) - 1)); 527 } 528